You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pinot.apache.org by GitBox <gi...@apache.org> on 2019/01/11 23:39:19 UTC
[GitHub] apucher closed pull request #3678: [TE] meta - rename base package to match new apache pinot convention

apucher closed pull request #3678: [TE] meta - rename base package to match new apache pinot convention
URL: https://github.com/apache/incubator-pinot/pull/3678
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/pom.xml b/pom.xml
index 16cb23c0e9..a28205b2d0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1045,6 +1045,7 @@
             <exclude>thirdeye/**/*</exclude>
             <!-- Documentation files -->
             <exclude>**/docs/_build/**</exclude>
+            <exclude>**/?*</exclude>
           </excludes>
           <mapping>
             <thrift>JAVADOC_STYLE</thrift>
diff --git a/thirdeye/pom.xml b/thirdeye/pom.xml
index 7a0a2c87b5..d8f8080e36 100644
--- a/thirdeye/pom.xml
+++ b/thirdeye/pom.xml
@@ -20,7 +20,7 @@
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0                              http://maven.apache.org/maven-v4_0_0.xsd">
   <modelVersion>4.0.0</modelVersion>
-  <groupId>com.linkedin.thirdeye</groupId>
+  <groupId>org.apache.pinot.thirdeye</groupId>
   <artifactId>thirdeye</artifactId>
   <version>1.0-SNAPSHOT</version>
   <packaging>pom</packaging>
@@ -80,6 +80,7 @@
     <kafka.clients.version>0.10.2.1</kafka.clients.version>
     <swagger.version>0.7.0</swagger.version>
     <calendar-api.version>v3-rev300-1.23.0</calendar-api.version>
+    <json.version>20170516</json.version>
   </properties>
 
   <licenses>
@@ -124,7 +125,7 @@
   </developers>
 
   <scm>
-    <developerConnection>scm:git:git://git@github.com/linkedin/pinot.git</developerConnection>
+    <developerConnection>scm:git:git://git@github.com/apache/incubator-pinot.git</developerConnection>
   </scm>
 
   <inceptionYear>2014-2018</inceptionYear>
@@ -158,17 +159,17 @@
     <dependencies>
       <!-- project dependencies -->
       <dependency>
-        <groupId>com.linkedin.pinot</groupId>
+        <groupId>org.apache.pinot</groupId>
         <artifactId>pinot-api</artifactId>
         <version>${pinot.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.linkedin.pinot</groupId>
+        <groupId>org.apache.pinot</groupId>
         <artifactId>pinot-common</artifactId>
         <version>${pinot.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.linkedin.pinot</groupId>
+        <groupId>org.apache.pinot</groupId>
         <artifactId>pinot-core</artifactId>
         <version>${pinot.version}</version>
       </dependency>
@@ -461,6 +462,12 @@
         <artifactId>hsqldb</artifactId>
         <version>${hsqldb.version}</version>
       </dependency>
+      <dependency>
+        <!-- TODO: replace code dependencies and remove -->
+        <groupId>org.json</groupId>
+        <artifactId>json</artifactId>
+        <version>${json.version}</version>
+      </dependency>
 
       <!-- swagger -->
       <dependency>
diff --git a/thirdeye/thirdeye-frontend/app/pods/auto-onboard/controller.js b/thirdeye/thirdeye-frontend/app/pods/auto-onboard/controller.js
index a2b6befc34..4f0657f941 100644
--- a/thirdeye/thirdeye-frontend/app/pods/auto-onboard/controller.js
+++ b/thirdeye/thirdeye-frontend/app/pods/auto-onboard/controller.js
@@ -208,8 +208,8 @@ export default Controller.extend({
           return;
         }
         const detectionConfig = {
-          className: 'com.linkedin.thirdeye.detection.algorithm.DimensionWrapper', nested: [{
-            className: 'com.linkedin.thirdeye.detection.algorithm.MovingWindowAlgorithm',
+          className: 'org.apache.pinot.thirdeye.detection.algorithm.DimensionWrapper', nested: [{
+            className: 'org.apache.pinot.thirdeye.detection.algorithm.MovingWindowAlgorithm',
             baselineWeeks: 4,
             windowSize: '4 weeks',
             changeDuration: '7d',
@@ -242,7 +242,7 @@ export default Controller.extend({
 
       const configResult = {
         "cron": "45 10/15 * * * ? *", "name": get(this, 'detectionConfigName'), "lastTimestamp": 0, "properties": {
-          "className": "com.linkedin.thirdeye.detection.algorithm.MergeWrapper",
+          "className": "org.apache.pinot.thirdeye.detection.algorithm.MergeWrapper",
           "maxGap": 7200000,
           "nested": nestedProperties,
           "datasetName": get(this, 'datasetName')
diff --git a/thirdeye/thirdeye-frontend/pom.xml b/thirdeye/thirdeye-frontend/pom.xml
index 206873178b..c8ac79c534 100644
--- a/thirdeye/thirdeye-frontend/pom.xml
+++ b/thirdeye/thirdeye-frontend/pom.xml
@@ -4,11 +4,11 @@
   xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
   <modelVersion>4.0.0</modelVersion>
   <parent>
-    <groupId>com.linkedin.thirdeye</groupId>
+    <groupId>org.apache.pinot.thirdeye</groupId>
     <artifactId>thirdeye</artifactId>
     <version>1.0-SNAPSHOT</version>
   </parent>
-  <groupId>com.linkedin.thirdeye</groupId>
+  <groupId>org.apache.pinot.thirdeye</groupId>
   <artifactId>thirdeye-frontend</artifactId>
   <name>thirdeye-frontend</name>
   <url>http://maven.apache.org</url>
diff --git a/thirdeye/thirdeye-hadoop/pom.xml b/thirdeye/thirdeye-hadoop/pom.xml
index 66642a7be8..1c3561be01 100644
--- a/thirdeye/thirdeye-hadoop/pom.xml
+++ b/thirdeye/thirdeye-hadoop/pom.xml
@@ -4,7 +4,7 @@
     xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
     <modelVersion>4.0.0</modelVersion>
     <parent>
-        <groupId>com.linkedin.thirdeye</groupId>
+        <groupId>org.apache.pinot.thirdeye</groupId>
         <artifactId>thirdeye</artifactId>
         <version>1.0-SNAPSHOT</version>
     </parent>
@@ -18,7 +18,7 @@
 
     <dependencies>
         <dependency>
-            <groupId>com.linkedin.pinot</groupId>
+            <groupId>org.apache.pinot</groupId>
             <artifactId>pinot-core</artifactId>
             <exclusions>
                 <exclusion>
@@ -138,7 +138,7 @@
                             <transformers>
                                 <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
                                 <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
-                                    <mainClass>com.linkedin.thirdeye.hadoop.ThirdEyeJob</mainClass>
+                                    <mainClass>org.apache.pinot.thirdeye.hadoop.ThirdEyeJob</mainClass>
                                 </transformer>
                             </transformers>
                             <relocations>
@@ -146,7 +146,7 @@
                                     <pattern>com.</pattern>
                                     <shadedPattern>thirdeye.com.</shadedPattern>
                                     <excludes>
-                                        <exclude>com.linkedin.thirdeye.**</exclude>
+                                        <exclude>org.apache.pinot.thirdeye.**</exclude>
                                     </excludes>
                                 </relocation>
                                 <relocation>
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJob.java
deleted file mode 100644
index 5f496dedc9..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJob.java
+++ /dev/null
@@ -1,455 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobStatus;
-import org.joda.time.DateTime;
-import org.joda.time.format.ISODateTimeFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseConstants;
-import com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob;
-import com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseConstants;
-import com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseJob;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants;
-import com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseJob;
-import com.linkedin.thirdeye.hadoop.join.JoinPhaseJob;
-import com.linkedin.thirdeye.hadoop.push.SegmentPushPhase;
-import com.linkedin.thirdeye.hadoop.push.SegmentPushPhaseConstants;
-import com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants;
-import com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseJob;
-import com.linkedin.thirdeye.hadoop.topk.TopKPhaseConstants;
-import com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob;
-import com.linkedin.thirdeye.hadoop.transform.TransformPhaseJob;
-import com.linkedin.thirdeye.hadoop.wait.WaitPhaseJob;
-
-/**
- * Wrapper to manage segment create and segment push jobs for thirdeye
- */
-public class ThirdEyeJob {
-  private static final Logger LOGGER = LoggerFactory.getLogger(ThirdEyeJob.class);
-
-  private static final String USAGE = "usage: phase_name job.properties";
-
-  private final String phaseName;
-  private final Properties inputConfig;
-
-  public ThirdEyeJob(String jobName, Properties config) {
-    String phaseFromConfig = config.getProperty(ThirdEyeJobProperties.THIRDEYE_PHASE.getName());
-    if (phaseFromConfig != null) {
-      this.phaseName = phaseFromConfig;
-    } else {
-      this.phaseName = jobName;
-    }
-    this.inputConfig = config;
-  }
-
-  private enum PhaseSpec {
-
-    BACKFILL {
-      @Override
-      Class<?> getKlazz() {
-        return BackfillPhaseJob.class;
-      }
-
-      @Override
-      String getDescription() {
-        return "Backfills older pinot segments with star tree index and topk information";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-          DateTime minTime, DateTime maxTime, String inputPaths)
-              throws Exception {
-        Properties config = new Properties();
-
-        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_CONTROLLER_HOST.toString(),
-            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_HOSTS.getName()));
-        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_CONTROLLER_PORT.toString(),
-            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_PORT.getName()));
-
-        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_START_TIME.toString(),
-            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_BACKFILL_START_TIME.getName()));
-        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_END_TIME.toString(),
-            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_BACKFILL_END_TIME.getName()));
-
-        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_OUTPUT_PATH.toString(),
-            getIndexDir(root, collection, minTime, maxTime) + File.separator + BACKFILL.getName());
-        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_TABLE_NAME.toString(), collection);
-
-        return config;
-      }
-    },
-    WAIT {
-      @Override
-      Class<?> getKlazz() {
-        return null;
-      }
-
-      @Override
-      String getDescription() {
-        return "Polls a pre-determined amount of time for the existence of input paths";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-          DateTime minTime, DateTime maxTime, String inputPaths)
-              throws Exception {
-        return null;
-      }
-    },
-    JOIN {
-      @Override
-      Class<?> getKlazz() {
-        return JoinPhaseJob.class;
-      }
-
-      @Override
-      String getDescription() {
-        return "Joins multiple data sets based on join key";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-          DateTime minTime, DateTime maxTime, String inputPaths) {
-        return inputConfig;
-      }
-    },
-    TRANSFORM {
-      @Override
-      Class<?> getKlazz() {
-        return TransformPhaseJob.class;
-      }
-
-      @Override
-      String getDescription() {
-        return "Transforms avro record";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-          DateTime minTime, DateTime maxTime, String inputPaths) {
-        return inputConfig;
-      }
-    },
-    AGGREGATION {
-      @Override
-      Class<?> getKlazz() {
-        return AggregationPhaseJob.class;
-      }
-
-      @Override
-      String getDescription() {
-        return "Aggregates input avro data to another time granularity";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-          DateTime minTime, DateTime maxTime, String inputPaths)
-              throws Exception {
-        Properties config = new Properties();
-
-        config.setProperty(AggregationPhaseConstants.AGG_PHASE_INPUT_PATH.toString(),
-            inputPaths);
-        config.setProperty(AggregationPhaseConstants.AGG_PHASE_OUTPUT_PATH.toString(),
-            getIndexDir(root, collection, minTime, maxTime) + File.separator
-                + AGGREGATION.getName());
-
-        return config;
-      }
-    },
-    TOPK {
-      @Override
-      Class<?> getKlazz() {
-        return TopKPhaseJob.class;
-      }
-
-      @Override
-      String getDescription() {
-        return "Topk";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-          DateTime minTime, DateTime maxTime, String inputPaths)
-              throws Exception {
-        Properties config = new Properties();
-
-        Path aggOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator + AGGREGATION.getName());
-        FileSystem fs = FileSystem.get(new Configuration());
-        if (fs.exists(aggOutputPath)) {
-          inputPaths = aggOutputPath.toString();
-        }
-        config.setProperty(TopKPhaseConstants.TOPK_PHASE_INPUT_PATH.toString(),
-            inputPaths);
-        config.setProperty(TopKPhaseConstants.TOPK_PHASE_OUTPUT_PATH.toString(),
-            getIndexDir(root, collection, minTime, maxTime) + File.separator
-                + TOPK.getName());
-
-        return config;
-      }
-    },
-    DERIVED_COLUMN_TRANSFORMATION {
-      @Override
-      Class<?> getKlazz() {
-        return DerivedColumnTransformationPhaseJob.class;
-      }
-
-      @Override
-      String getDescription() {
-        return "Adds new columns for dimensions with topk or whitelist";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-          DateTime minTime, DateTime maxTime, String inputPaths)
-              throws Exception {
-        Properties config = new Properties();
-
-        Path aggOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator + AGGREGATION.getName());
-        FileSystem fs = FileSystem.get(new Configuration());
-        if (fs.exists(aggOutputPath)) {
-          inputPaths = aggOutputPath.toString();
-        }
-        config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH.toString(),
-            inputPaths);
-        config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(),
-            getIndexDir(root, collection, minTime, maxTime));
-        config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH.toString(),
-            getIndexDir(root, collection, minTime, maxTime) + File.separator
-              + DERIVED_COLUMN_TRANSFORMATION.getName());
-        config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString(),
-            getIndexDir(root, collection, minTime, maxTime) + File.separator + TOPK.getName());
-
-        return config;
-      }
-    },
-    SEGMENT_CREATION {
-      @Override
-      Class<?> getKlazz() {
-        return SegmentCreationPhaseJob.class;
-      }
-
-      @Override
-      String getDescription() {
-        return "Generates pinot segments";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-          DateTime minTime, DateTime maxTime, String inputPaths)
-              throws Exception {
-        Properties config = new Properties();
-
-        Path derivedOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator +
-            DERIVED_COLUMN_TRANSFORMATION.getName());
-        Path aggregationOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator +
-            AGGREGATION.getName());
-        FileSystem fs = FileSystem.get(new Configuration());
-        if (fs.exists(derivedOutputPath)) {
-          inputPaths = derivedOutputPath.toString();
-        } else if (fs.exists(aggregationOutputPath)) {
-          inputPaths = aggregationOutputPath.toString();
-        }
-
-        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_INPUT_PATH.toString(), inputPaths);
-        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_OUTPUT_PATH.toString(),
-            getIndexDir(root, collection, minTime, maxTime) + File.separator + SEGMENT_CREATION.getName());
-        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_START_TIME.toString(),
-            String.valueOf(minTime.getMillis()));
-        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_END_TIME.toString(),
-            String.valueOf(maxTime.getMillis()));
-
-        String schedule = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_FLOW_SCHEDULE.getName());
-        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_SCHEDULE.toString(), schedule);
-        return config;
-      }
-    },
-    SEGMENT_PUSH {
-      @Override
-      Class<?> getKlazz() {
-        return SegmentPushPhase.class;
-      }
-
-      @Override
-      String getDescription() {
-        return "Pushes pinot segments to pinot controller";
-      }
-
-      @Override
-      Properties getJobProperties(Properties inputConfig, String root, String collection,
-           DateTime minTime, DateTime maxTime, String inputPaths)
-              throws Exception {
-        Properties config = new Properties();
-
-        config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_INPUT_PATH.toString(),
-            getIndexDir(root, collection, minTime, maxTime) + File.separator + SEGMENT_CREATION.getName());
-        config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_CONTROLLER_HOSTS.toString(),
-            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_HOSTS.getName()));
-        config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_CONTROLLER_PORT.toString(),
-            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_PORT.getName()));
-        return config;
-      }
-    };
-
-    abstract Class<?> getKlazz();
-
-    abstract String getDescription();
-
-    abstract Properties getJobProperties(Properties inputConfig, String root, String collection,
-        DateTime minTime, DateTime maxTime, String inputPaths) throws Exception;
-
-    String getName() {
-      return this.name().toLowerCase();
-    }
-
-    String getIndexDir(String root, String collection, DateTime minTime,
-        DateTime maxTime) throws IOException {
-      return getCollectionDir(root, collection) + File.separator
-          + "data_" + ThirdEyeConstants.DATE_TIME_FORMATTER.print(minTime) + "_"
-          + ThirdEyeConstants.DATE_TIME_FORMATTER.print(maxTime);
-    }
-
-  }
-
-  private static void usage() {
-    System.err.println(USAGE);
-    for (PhaseSpec phase : PhaseSpec.values()) {
-      System.err.printf("%-30s : %s\n", phase.getName(), phase.getDescription());
-    }
-  }
-
-  private static String getAndCheck(String name, Properties properties) {
-    String value = properties.getProperty(name);
-    if (value == null) {
-      throw new IllegalArgumentException("Must provide " + name);
-    }
-    return value;
-  }
-
-
-  private static String getCollectionDir(String root, String collection) {
-    return root == null ? collection : root + File.separator + collection;
-  }
-
-  private void setMapreduceConfig(Configuration configuration) {
-    String mapreduceConfig =
-        inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_MR_CONF.getName());
-    if (mapreduceConfig != null && !mapreduceConfig.isEmpty()) {
-      String[] options = mapreduceConfig.split(",");
-      for (String option : options) {
-        String[] configs = option.split("=", 2);
-        if (configs.length == 2) {
-          LOGGER.info("Setting job configuration {} to {}", configs[0], configs[1]);
-          configuration.set(configs[0], configs[1]);
-        }
-      }
-    }
-  }
-
-  @SuppressWarnings("unchecked")
-  public void run() throws Exception {
-    LOGGER.info("Input config:{}", inputConfig);
-    PhaseSpec phaseSpec;
-    try {
-      phaseSpec = PhaseSpec.valueOf(phaseName.toUpperCase());
-    } catch (Exception e) {
-      usage();
-      throw e;
-    }
-
-    if (PhaseSpec.TRANSFORM.equals(phaseSpec)) {
-      TransformPhaseJob job = new TransformPhaseJob("Transform Job", inputConfig);
-      job.run();
-      return;
-
-    } else if (PhaseSpec.JOIN.equals(phaseSpec)) {
-      JoinPhaseJob job = new JoinPhaseJob("Join Job", inputConfig);
-      job.run();
-      return;
-
-    } else if (PhaseSpec.WAIT.equals(phaseSpec)) {
-      WaitPhaseJob job = new WaitPhaseJob("Wait for inputs", inputConfig);
-      job.run();
-      return;
-    }
-
-    // Get root, collection, input paths
-    String root = getAndCheck(ThirdEyeJobProperties.THIRDEYE_ROOT.getName(), inputConfig);
-    String collection =
-        getAndCheck(ThirdEyeJobProperties.THIRDEYE_COLLECTION.getName(), inputConfig);
-    String inputPaths = getAndCheck(ThirdEyeJobProperties.INPUT_PATHS.getName(), inputConfig);
-
-    // Get min / max time
-    DateTime minTime;
-    DateTime maxTime;
-
-    String minTimeProp = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_TIME_MIN.getName());
-    String maxTimeProp = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_TIME_MAX.getName());
-
-    minTime = ISODateTimeFormat.dateTimeParser().parseDateTime(minTimeProp);
-    maxTime = ISODateTimeFormat.dateTimeParser().parseDateTime(maxTimeProp);
-
-    Properties jobProperties = phaseSpec.getJobProperties(inputConfig, root, collection,
-        minTime, maxTime, inputPaths);
-    for (Object key : inputConfig.keySet()) {
-      jobProperties.setProperty(key.toString(), inputConfig.getProperty(key.toString()));
-    }
-
-    // Instantiate the job
-    Constructor<Configured> constructor = (Constructor<Configured>) phaseSpec.getKlazz()
-        .getConstructor(String.class, Properties.class);
-    Configured instance = constructor.newInstance(phaseSpec.getName(), jobProperties);
-    setMapreduceConfig(instance.getConf());
-
-    // Run the job
-    Method runMethod = instance.getClass().getMethod("run");
-    Job job = (Job) runMethod.invoke(instance);
-    if (job != null) {
-      JobStatus status = job.getStatus();
-      if (status.getState() != JobStatus.State.SUCCEEDED) {
-        throw new RuntimeException(
-            "Job " + job.getJobName() + " failed to execute: Ran with config:" + jobProperties);
-      }
-    }
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 2) {
-      usage();
-      System.exit(1);
-    }
-
-    String phaseName = args[0];
-    Properties config = new Properties();
-    config.load(new FileInputStream(args[1]));
-    new ThirdEyeJob(phaseName, config).run();
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJobProperties.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJobProperties.java
deleted file mode 100644
index a002c4ac80..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/ThirdEyeJobProperties.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop;
-
-public enum ThirdEyeJobProperties {
-  THIRDEYE_FLOW_SCHEDULE("thirdeye.flow.schedule"), // HOURLY, DAILY
-  THIRDEYE_PHASE("thirdeye.phase"), // segment_creation, segment_push
-  THIRDEYE_ROOT("thirdeye.root"),
-  THIRDEYE_COLLECTION("thirdeye.collection"),
-  THIRDEYE_TIME_MIN("thirdeye.time.min"), // YYYY-mm-ddThh
-  THIRDEYE_TIME_MAX("thirdeye.time.max"),
-  INPUT_PATHS("input.paths"),
-  THIRDEYE_MR_CONF("thirdeye.mr.conf"),
-  THIRDEYE_PINOT_CONTROLLER_HOSTS("thirdeye.pinot.controller.hosts"),
-  THIRDEYE_PINOT_CONTROLLER_PORT("thirdeye.pinot.controller.port"),
-  THIRDEYE_BACKFILL_START_TIME("thirdeye.backfill.start.time"),
-  THIRDEYE_BACKFILL_END_TIME("thirdeye.backfill.end.time"),
-  THIRDEYE_NUM_REDUCERS("thirdeye.num.reducers");
-
-  private final String propertyName;
-
-  ThirdEyeJobProperties(String propertyName) {
-    this.propertyName = propertyName;
-  }
-
-  public String getName() {
-    return propertyName;
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java
deleted file mode 100644
index aa92df9127..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.TimeSpec;
-
-/**
- * This class contains the config needed by aggregation
- * and the methods to obtain the config from the ThirdEyeConfig
- */
-public class AggregationPhaseConfig {
-  private List<String> dimensionNames;
-  private List<DimensionType> dimensionTypes;
-  private List<String> metricNames;
-  private List<MetricType> metricTypes;
-  private TimeSpec time;
-  private TimeSpec inputTime;
-
-  public AggregationPhaseConfig() {
-
-  }
-
-  public AggregationPhaseConfig(List<String> dimensionNames, List<String> metricNames,
-      List<DimensionType> dimensionTypes, List<MetricType> metricTypes, TimeSpec time, TimeSpec inputTime) {
-    super();
-    this.dimensionNames = dimensionNames;
-    this.dimensionTypes = dimensionTypes;
-    this.metricNames = metricNames;
-    this.metricTypes = metricTypes;
-    this.time = time;
-    this.inputTime = inputTime;
-  }
-
-  public List<String> getDimensionNames() {
-    return dimensionNames;
-  }
-
-  public List<DimensionType> getDimensionTypes() {
-    return dimensionTypes;
-  }
-
-  public List<String> getMetricNames() {
-    return metricNames;
-  }
-
-  public List<MetricType> getMetricTypes() {
-    return metricTypes;
-  }
-
-  public TimeSpec getTime() {
-    return time;
-  }
-
-  public TimeSpec getInputTime() {
-    return inputTime;
-  }
-
-  public static AggregationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
-
-    // metrics
-    List<String> metricNames = new ArrayList<>(config.getMetrics().size());
-    List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
-    for (MetricSpec spec : config.getMetrics()) {
-      metricNames.add(spec.getName());
-      metricTypes.add(spec.getType());
-    }
-
-    // dimensions
-    List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
-    List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
-    for (DimensionSpec spec : config.getDimensions()) {
-      dimensionNames.add(spec.getName());
-      dimensionTypes.add(spec.getDimensionType());
-    }
-
-    // time
-    TimeSpec time = config.getTime();
-
-    // input time
-    TimeSpec inputTime = config.getInputTime();
-    if (inputTime == null) {
-      throw new IllegalStateException("Must provide input time configs for aggregation job");
-    }
-
-    return new AggregationPhaseConfig(dimensionNames, metricNames, dimensionTypes, metricTypes, time, inputTime);
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java
deleted file mode 100644
index 58de4e7384..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-/**
- * This class contains the properties to be set for aggregation phase
- */
-public enum AggregationPhaseConstants {
-
-  AGG_PHASE_INPUT_PATH("aggregation.phase.input.path"),
-  AGG_PHASE_AVRO_SCHEMA("aggregation.phase.avro.schema"),
-  AGG_PHASE_OUTPUT_PATH("aggregation.phase.output.path"),
-  AGG_PHASE_THIRDEYE_CONFIG("aggregation.phase.thirdeye.config");
-
-  String name;
-
-  AggregationPhaseConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseJob.java
deleted file mode 100644
index 302d19b612..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseJob.java
+++ /dev/null
@@ -1,360 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroJob;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyOutputFormat;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.linkedin.thirdeye.hadoop.ThirdEyeJobProperties;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.config.TimeGranularity;
-import com.linkedin.thirdeye.hadoop.config.TimeSpec;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAggregateMetricUtils;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-import static com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseConstants.*;
-
-/**
- * Buckets input avro data according to granularity specified in config and aggregates metrics
- * Mapper:
- * Converts time column into bucket granularity
- * Reducer:
- * Aggregates all records with same dimensions in one time bucket
- */
-public class AggregationPhaseJob extends Configured {
-  private static final Logger LOGGER = LoggerFactory.getLogger(AggregationPhaseJob.class);
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-  private String name;
-  private Properties props;
-
-  public AggregationPhaseJob(String name, Properties props) {
-    super(new Configuration());
-    this.name = name;
-    this.props = props;
-  }
-
-  public static class AggregationMapper extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
-
-    private ThirdEyeConfig thirdeyeConfig;
-    private AggregationPhaseConfig config;
-    private List<String> dimensionNames;
-    private List<DimensionType> dimensionTypes;
-    private List<String> metricNames;
-    List<MetricType> metricTypes;
-    private int numMetrics;
-    private String timeColumnName;
-    private TimeGranularity inputGranularity;
-    private TimeGranularity aggregateGranularity;
-    private BytesWritable keyWritable;
-    private BytesWritable valWritable;
-    private int numRecords;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-      LOGGER.info("AggregationPhaseJob.AggregationPhaseMapper.setup()");
-      Configuration configuration = context.getConfiguration();
-
-      thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(AGG_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-      config = AggregationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-      dimensionNames = config.getDimensionNames();
-      dimensionTypes = config.getDimensionTypes();
-      metricNames = config.getMetricNames();
-      numMetrics = metricNames.size();
-      metricTypes = config.getMetricTypes();
-      timeColumnName = config.getTime().getColumnName();
-      inputGranularity = config.getInputTime().getTimeGranularity();
-      aggregateGranularity = config.getTime().getTimeGranularity();
-      keyWritable = new BytesWritable();
-      valWritable = new BytesWritable();
-      numRecords = 0;
-    }
-
-    @Override
-    public void map(AvroKey<GenericRecord> record, NullWritable value, Context context) throws IOException, InterruptedException {
-
-      // input record
-      GenericRecord inputRecord = record.datum();
-
-      // dimensions
-      List<Object> dimensions = new ArrayList<>();
-      for (String dimension : dimensionNames) {
-        Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimension);
-        dimensions.add(dimensionValue);
-      }
-
-      // metrics
-      Number[] metrics = new Number[numMetrics];
-      for (int i = 0; i < numMetrics; i++) {
-        Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricNames.get(i), metricTypes.get(i));
-        metrics[i] = metricValue;
-      }
-
-      // time
-      long timeValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName).longValue();
-      long inputTimeMillis = inputGranularity.toMillis(timeValue);
-      long bucketTime = aggregateGranularity.convertToUnit(inputTimeMillis);
-
-      AggregationPhaseMapOutputKey keyWrapper = new AggregationPhaseMapOutputKey(bucketTime, dimensions, dimensionTypes);
-      byte[] keyBytes = keyWrapper.toBytes();
-      keyWritable.set(keyBytes, 0, keyBytes.length);
-
-      AggregationPhaseMapOutputValue valWrapper = new AggregationPhaseMapOutputValue(metrics, metricTypes);
-      byte[] valBytes = valWrapper.toBytes();
-      valWritable.set(valBytes, 0, valBytes.length);
-
-      numRecords ++;
-      context.write(keyWritable, valWritable);
-    }
-
-    @Override
-    public void cleanup(Context context) throws IOException, InterruptedException {
-      context.getCounter(AggregationCounter.NUMBER_OF_RECORDS).increment(numRecords);
-    }
-  }
-
-  public static class AggregationReducer
-      extends Reducer<BytesWritable, BytesWritable, AvroKey<GenericRecord>, NullWritable> {
-
-    private Schema avroSchema;
-    private ThirdEyeConfig thirdeyeConfig;
-    private AggregationPhaseConfig config;
-    private List<String> dimensionsNames;
-    private List<DimensionType> dimensionTypes;
-    private List<String> metricNames;
-    List<MetricType> metricTypes;
-    private int numMetrics;
-    private TimeSpec time;
-    private int numRecords;
-    private Number[] metricSums;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-      LOGGER.info("AggregationPhaseJob.AggregationPhaseReducer.setup()");
-      Configuration configuration = context.getConfiguration();
-
-      thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(AGG_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-      config = AggregationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-      dimensionsNames = config.getDimensionNames();
-      dimensionTypes = config.getDimensionTypes();
-      metricNames = config.getMetricNames();
-      numMetrics = metricNames.size();
-      metricTypes = config.getMetricTypes();
-      time = config.getTime();
-      avroSchema = new Schema.Parser().parse(configuration.get(AGG_PHASE_AVRO_SCHEMA.toString()));
-      numRecords = 0;
-      metricSums = new Number[numMetrics];
-      Arrays.fill(metricSums, 0);
-    }
-
-    @Override
-    public void reduce(BytesWritable aggregationKey, Iterable<BytesWritable> values,
-        Context context) throws IOException, InterruptedException {
-
-      // output record
-      GenericRecord outputRecord = new Record(avroSchema);
-
-      AggregationPhaseMapOutputKey keyWrapper = AggregationPhaseMapOutputKey.fromBytes(aggregationKey.getBytes(), dimensionTypes);
-
-      // time
-      long timeValue = keyWrapper.getTime();
-      outputRecord.put(time.getColumnName(), timeValue);
-
-      // dimensions
-      List<Object> dimensionValues = keyWrapper.getDimensionValues();
-      for (int i = 0; i < dimensionsNames.size(); i++) {
-        String dimensionName = dimensionsNames.get(i);
-        Object dimensionValue = dimensionValues.get(i);
-        outputRecord.put(dimensionName, dimensionValue);
-      }
-
-      // aggregate metrics
-      Number[] aggMetricValues = new Number[numMetrics];
-      Arrays.fill(aggMetricValues, 0);
-      for (BytesWritable value : values) {
-        Number[] metricValues = AggregationPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes).getMetricValues();
-        ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
-      }
-      ThirdeyeAggregateMetricUtils.aggregate(metricTypes, metricSums, aggMetricValues);
-
-      // metrics
-      for (int i = 0; i < numMetrics; i++) {
-        String metricName = metricNames.get(i);
-        Number metricValue = aggMetricValues[i];
-        outputRecord.put(metricName, metricValue);
-      }
-
-      numRecords ++;
-      AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
-      context.write(outputKey, NullWritable.get());
-    }
-
-    @Override
-    public void cleanup(Context context) throws IOException, InterruptedException {
-      context.getCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED).increment(numRecords);
-      for (int i = 0; i < numMetrics; i++) {
-        context.getCounter(thirdeyeConfig.getCollection(), metricNames.get(i)).increment(metricSums[i].longValue());
-      }
-    }
-  }
-
-  public Job run() throws Exception {
-    Job job = Job.getInstance(getConf());
-    job.setJobName(name);
-    job.setJarByClass(AggregationPhaseJob.class);
-
-    FileSystem fs = FileSystem.get(getConf());
-    Configuration configuration = job.getConfiguration();
-
-    // Properties
-    LOGGER.info("Properties {}", props);
-
-     // Input Path
-    String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
-    LOGGER.info("Input path dir: " + inputPathDir);
-    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
-      LOGGER.info("Adding input:" + inputPath);
-      Path input = new Path(inputPath);
-      FileInputFormat.addInputPath(job, input);
-    }
-
-    // Output path
-    Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
-    LOGGER.info("Output path dir: " + outputPath.toString());
-    if (fs.exists(outputPath)) {
-      fs.delete(outputPath, true);
-    }
-    FileOutputFormat.setOutputPath(job, outputPath);
-
-    // Schema
-    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
-    LOGGER.info("Schema : {}", avroSchema.toString(true));
-    job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());
-
-    // ThirdEyeConfig
-    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
-    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
-    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
-    job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    // Map config
-    job.setMapperClass(AggregationMapper.class);
-    job.setInputFormatClass(AvroKeyInputFormat.class);
-    job.setMapOutputKeyClass(BytesWritable.class);
-    job.setMapOutputValueClass(BytesWritable.class);
-
-    // Reduce config
-    job.setReducerClass(AggregationReducer.class);
-    job.setOutputKeyClass(AvroKey.class);
-    job.setOutputValueClass(NullWritable.class);
-    AvroJob.setOutputKeySchema(job, avroSchema);
-    job.setOutputFormatClass(AvroKeyOutputFormat.class);
-    String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
-    LOGGER.info("Num Reducers : {}", numReducers);
-    if (StringUtils.isNotBlank(numReducers)) {
-      job.setNumReduceTasks(Integer.valueOf(numReducers));
-      LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
-    }
-
-    job.waitForCompletion(true);
-
-    Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
-    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
-    if (counter.getValue() == 0) {
-      throw new IllegalStateException("No input records in " + inputPathDir);
-    }
-    counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
-    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
-
-    for (String metric : thirdeyeConfig.getMetricNames()) {
-      counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
-      LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
-    }
-
-    return job;
-  }
-
-  private String getAndSetConfiguration(Configuration configuration,
-      AggregationPhaseConstants constant) {
-    String value = getAndCheck(constant.toString());
-    configuration.set(constant.toString(), value);
-    return value;
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-  public static enum AggregationCounter {
-    NUMBER_OF_RECORDS,
-    NUMBER_OF_RECORDS_FLATTENED
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 1) {
-      throw new IllegalArgumentException("usage: config.properties");
-    }
-
-    Properties props = new Properties();
-    props.load(new FileInputStream(args[0]));
-
-    AggregationPhaseJob job = new AggregationPhaseJob("aggregate_avro_job", props);
-    job.run();
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java
deleted file mode 100644
index ecc0bdd4e7..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-
-/**
- * Wrapper for the key generated by mapper in Aggregation
- */
-public class AggregationPhaseMapOutputKey {
-
-  private long time;
-  private List<Object> dimensionValues;
-  private List<DimensionType> dimensionTypes;
-
-  public AggregationPhaseMapOutputKey(long time, List<Object> dimensionValues, List<DimensionType> dimensionTypes) {
-    this.time = time;
-    this.dimensionValues = dimensionValues;
-    this.dimensionTypes = dimensionTypes;
-  }
-
-  public long getTime() {
-    return time;
-  }
-
-  public List<Object> getDimensionValues() {
-    return dimensionValues;
-  }
-
-  public List<DimensionType> getDimensionTypes() {
-    return dimensionTypes;
-  }
-
-  /**
-   * Converts AggregationPhaseMapOutputKey to bytes buffer
-   * @return
-   * @throws IOException
-   */
-  public byte[] toBytes() throws IOException {
-
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    DataOutputStream dos = new DataOutputStream(baos);
-
-    // time
-    dos.writeLong(time);
-
-    // dimensions size
-    dos.writeInt(dimensionValues.size());
-    // dimension values
-    for (int i = 0; i < dimensionValues.size(); i++) {
-      Object dimensionValue = dimensionValues.get(i);
-      DimensionType dimensionType = dimensionTypes.get(i);
-      DimensionType.writeDimensionValueToOutputStream(dos, dimensionValue, dimensionType);
-    }
-
-    baos.close();
-    dos.close();
-    return baos.toByteArray();
-  }
-
-  /**
-   * Constructs AggregationPhaseMapOutputKey from bytes buffer
-   * @param buffer
-   * @param dimensionTypes
-   * @return
-   * @throws IOException
-   */
-  public static AggregationPhaseMapOutputKey fromBytes(byte[] buffer, List<DimensionType> dimensionTypes) throws IOException {
-    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
-
-    // time
-    long time = dis.readLong();
-
-    // dimensions size
-    int size = dis.readInt();
-
-    // dimension value
-    List<Object> dimensionValues = new ArrayList<>();
-    for (int i = 0; i < size; i++) {
-      DimensionType dimensionType = dimensionTypes.get(i);
-      Object dimensionValue = DimensionType.readDimensionValueFromDataInputStream(dis, dimensionType);
-      dimensionValues.add(dimensionValue);
-    }
-
-    AggregationPhaseMapOutputKey wrapper;
-    wrapper = new AggregationPhaseMapOutputKey(time, dimensionValues, dimensionTypes);
-    return wrapper;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java
deleted file mode 100644
index 2493907a32..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-
-/**
- * Wrapper for value generated by mapper in AggregationPhase
- */
-public class AggregationPhaseMapOutputValue {
-
-  private Number[] metricValues;
-  private List<MetricType> metricTypes;
-
-  public AggregationPhaseMapOutputValue(Number[] metricValues, List<MetricType> metricTypes) {
-    this.metricValues = metricValues;
-    this.metricTypes = metricTypes;
-  }
-
-  public Number[] getMetricValues() {
-    return metricValues;
-  }
-
-  /**
-   * Converts a AggregationPhaseMapOutputvalue to a bytes buffer
-   * @return
-   * @throws IOException
-   */
-  public byte[] toBytes() throws IOException {
-
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    DataOutputStream dos = new DataOutputStream(baos);
-
-    // metric values
-    dos.writeInt(metricValues.length);
-    for (int i = 0; i < metricValues.length; i++) {
-      Number number = metricValues[i];
-      MetricType metricType = metricTypes.get(i);
-      MetricType.writeMetricValueToDataOutputStream(dos, number, metricType);
-    }
-
-    baos.close();
-    dos.close();
-    return baos.toByteArray();
-  }
-
-  /**
-   * Constructs an AggregationPhaseMapOutputValue from a bytes buffer
-   * @param buffer
-   * @param metricTypes
-   * @return
-   * @throws IOException
-   */
-  public static AggregationPhaseMapOutputValue fromBytes(byte[] buffer, List<MetricType> metricTypes) throws IOException {
-    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
-
-    // metric values
-    int length = dis.readInt();
-    Number[] metricValues = new Number[length];
-
-    for (int i = 0 ; i < length; i++) {
-      MetricType metricType = metricTypes.get(i);
-      Number metricValue = MetricType.readMetricValueFromDataInputStream(dis, metricType);
-      metricValues[i] = metricValue;
-    }
-
-    AggregationPhaseMapOutputValue wrapper;
-    wrapper = new AggregationPhaseMapOutputValue(metricValues, metricTypes);
-    return wrapper;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillControllerAPIs.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillControllerAPIs.java
deleted file mode 100644
index 469a830d47..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillControllerAPIs.java
+++ /dev/null
@@ -1,251 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.backfill;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.compress.archivers.ArchiveException;
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.HttpClient;
-import org.apache.http.client.methods.HttpGet;
-import org.apache.http.impl.client.DefaultHttpClient;
-import org.apache.http.util.EntityUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.io.Files;
-import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-
-/**
- * Contains APIs which are used for backfilling the pinot segments with star tree index
- */
-public class BackfillControllerAPIs {
-
-  private static Logger LOGGER = LoggerFactory.getLogger(BackfillControllerAPIs.class);
-  private HttpHost controllerHttpHost;
-  private String tableName;
-
-  private static String SEGMENTS_ENDPOINT = "segments/";
-  private static String TABLES_ENDPOINT = "tables/";
-  private static String METADATA_ENDPOINT = "metadata";
-  private static String UTF_8 = "UTF-8";
-
-  private static String SEGMENT_NAME = "segment.name";
-  private static String SEGMENT_TABLE_NAME = "segment.table.name";
-  private static String SEGMENT_END_TIME = "segment.end.time";
-  private static String SEGMENT_START_TIME = "segment.start.time";
-  private static String SEGMENT_TIME_UNIT = "segment.time.unit";
-
-  BackfillControllerAPIs(String controllerHost, int controllerPort, String tableName) {
-    this.tableName = tableName;
-    LOGGER.info("Connecting to {} {} table {}", controllerHost, controllerPort, tableName);
-    controllerHttpHost = new HttpHost(controllerHost, controllerPort);
-  }
-
-  /**
-   * Downloads a segment from the controller, given the table name and segment name
-   * @param segmentName
-   * @param hdfsSegmentPath
-   * @throws IOException
-   * @throws ArchiveException
-   */
-  public void downloadSegment(String segmentName, Path hdfsSegmentPath)
-      throws IOException, ArchiveException {
-
-    FileSystem fs = FileSystem.get(new Configuration());
-    HttpClient controllerClient = new DefaultHttpClient();
-    HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tableName, UTF_8)
-        + "/" + URLEncoder.encode(segmentName, UTF_8));
-    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
-    try {
-      if (res.getStatusLine().getStatusCode() != 200) {
-        throw new IllegalStateException(res.getStatusLine().toString());
-      }
-      LOGGER.info("Fetching segment {}", segmentName);
-      InputStream content = res.getEntity().getContent();
-
-      File tempDir = new File(Files.createTempDir(), "thirdeye_temp");
-      tempDir.mkdir();
-      LOGGER.info("Creating temporary dir for staging segments {}", tempDir);
-      File tempSegmentDir = new File(tempDir, segmentName);
-      File tempSegmentTar = new File(tempDir, segmentName + ThirdEyeConstants.TAR_SUFFIX);
-
-      LOGGER.info("Downloading {} to {}", segmentName, tempSegmentTar);
-      OutputStream out = new FileOutputStream(tempSegmentTar);
-      IOUtils.copy(content, out);
-      if (!tempSegmentTar.exists()) {
-        throw new IllegalStateException("Download of " + segmentName + " unsuccessful");
-      }
-
-      LOGGER.info("Extracting segment {} to {}", tempSegmentTar, tempDir);
-      TarGzCompressionUtils.unTar(tempSegmentTar, tempDir);
-      File[] files = tempDir.listFiles(new FilenameFilter() {
-
-        @Override
-        public boolean accept(File dir, String name) {
-          return !name.endsWith(ThirdEyeConstants.TAR_SUFFIX) && new File(dir, name).isDirectory();
-        }
-      });
-      if (files.length == 0) {
-        throw new IllegalStateException("Failed to extract " + tempSegmentTar + " to " + tempDir);
-      } else if (!files[0].getName().equals(tempSegmentDir.getName())){
-        LOGGER.info("Moving extracted segment to the segment dir {}", tempSegmentDir);
-        FileUtils.moveDirectory(files[0], tempSegmentDir);
-      }
-      if (!tempSegmentDir.exists()) {
-        throw new IllegalStateException("Failed to move " + files[0] + " to " + tempSegmentDir);
-      }
-
-      LOGGER.info("Copying segment from {} to hdfs {}", tempSegmentDir, hdfsSegmentPath);
-      fs.copyFromLocalFile(new Path(tempSegmentDir.toString()), hdfsSegmentPath);
-      Path hdfsSegmentDir = new Path(hdfsSegmentPath, segmentName);
-      if (!fs.exists(hdfsSegmentDir)) {
-        throw new IllegalStateException("Failed to copy segment " + segmentName + " from local path " + tempSegmentDir
-            + " to hdfs path " + hdfsSegmentPath);
-      }
-    } finally {
-      if (res.getEntity() != null) {
-        EntityUtils.consume(res.getEntity());
-      }
-    }
-    LOGGER.info("Successfully downloaded segment {} to {}", segmentName, hdfsSegmentPath);
-  }
-
-  /**
-   * Given a time range and list of all segments for a table, returns all segments which are in the time range
-   * @param tableName
-   * @param allSegments
-   * @param startTime
-   * @param endTime
-   * @return
-   * @throws Exception
-   */
-  public List<String> findSegmentsInRange(String tableName, List<String> allSegments, long startTime, long endTime)
-      throws Exception {
-    List<String> segmentsInRange = new ArrayList<>();
-    for (String segmentName : allSegments) {
-      Map<String, String> metadata = getSegmentMetadata(tableName, segmentName);
-      long segmentStartTime = Long.valueOf(metadata.get(SEGMENT_START_TIME));
-      long segmentEndTime = Long.valueOf(metadata.get(SEGMENT_END_TIME));
-      String segmentTableName = metadata.get(SEGMENT_TABLE_NAME);
-
-      // TODO:
-      // Using time value directly for now, as we only have time unit and not time size in metadata
-      // Once we have time size in metadata, we can accept the time in millis and then convert time from metadata accordingly
-      if (segmentTableName.equals(tableName) && ((segmentStartTime >= startTime && segmentStartTime <= endTime)
-          || (segmentEndTime >= startTime && segmentEndTime <= endTime))) {
-        LOGGER.info("Segment name : {}, Segment start : {}, Segment end : {}, Segment table : {}",
-            segmentName, segmentStartTime, segmentEndTime, segmentTableName);
-        segmentsInRange.add(segmentName);
-      }
-    }
-    return segmentsInRange;
-  }
-
-  /**
-   * Fetches the list of all segment names for a table
-   * @param tableName
-   * @return
-   * @throws IOException
-   */
-  public List<String> getAllSegments(String tableName) throws IOException {
-    List<String> allSegments = new ArrayList<>();
-
-    HttpClient controllerClient = new DefaultHttpClient();
-    HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tableName, UTF_8));
-    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
-    try {
-      if (res.getStatusLine().getStatusCode() != 200) {
-        throw new IllegalStateException(res.getStatusLine().toString());
-      }
-      InputStream content = res.getEntity().getContent();
-      String response = IOUtils.toString(content);
-      List<String> allSegmentsPaths = getSegmentsFromResponse(response);
-      for (String segment : allSegmentsPaths) {
-        allSegments.add(segment.substring(segment.lastIndexOf("/") + 1));
-      }
-      LOGGER.info("All segments : {}", allSegments);
-    } finally {
-      if (res.getEntity() != null) {
-        EntityUtils.consume(res.getEntity());
-      }
-    }
-    return allSegments;
-  }
-
-  /**
-   * Returns the metadata of a segment, given the segment name and table name
-   * @param tableName - table where segment resides
-   * @param segmentName - name of the segment
-   * @return
-   * @throws IOException
-   */
-  public Map<String, String> getSegmentMetadata(String tableName, String segmentName) throws IOException {
-    Map<String, String> metadata = null;
-    HttpClient controllerClient = new DefaultHttpClient();
-    HttpGet req = new HttpGet(TABLES_ENDPOINT + URLEncoder.encode(tableName, UTF_8)
-        + "/" + SEGMENTS_ENDPOINT + URLEncoder.encode(segmentName, UTF_8) + "/" + METADATA_ENDPOINT);
-    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
-    try {
-      if (res.getStatusLine().getStatusCode() != 200) {
-        throw new IllegalStateException(res.getStatusLine().toString());
-      }
-      InputStream content = res.getEntity().getContent();
-      String metadataResponse = IOUtils.toString(content);
-      metadata = getMetadataFromResponse(metadataResponse);
-    } finally {
-      if (res.getEntity() != null) {
-        EntityUtils.consume(res.getEntity());
-      }
-    }
-    return metadata;
-  }
-
-  private List<String> getSegmentsFromResponse(String response) {
-    String[] allSegments = response.replaceAll("\\[|\\]|\"", "").split(",");
-    return Arrays.asList(allSegments);
-  }
-
-  private Map<String, String> getMetadataFromResponse(String response) {
-    Map<String, String> metadata = new HashMap<>();
-    String cleanUpResponse = response.replaceAll("\\[|\\]|\"|\\{|\\}|\\\\", "");
-    String[] allProperties = cleanUpResponse.replace("state:", "").split(",");
-    for (String property : allProperties) {
-      String[] tokens = property.split(":", 2);
-      metadata.put(tokens[0], tokens[1]);
-    }
-    return metadata;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseConstants.java
deleted file mode 100644
index d9c4609041..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseConstants.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.backfill;
-
-/**
- * This class contains the properties to be set for backfill phase
- */
-public enum BackfillPhaseConstants {
-
-  BACKFILL_PHASE_CONTROLLER_HOST("backfill.phase.controller.host"),
-  BACKFILL_PHASE_CONTROLLER_PORT("backfill.phase.controller.port"),
-  BACKFILL_PHASE_START_TIME("backfill.phase.start.time"),
-  BACKFILL_PHASE_END_TIME("backfill.phase.end.time"),
-  BACKFILL_PHASE_TABLE_NAME("backfill.phase.table.name"),
-  BACKFILL_PHASE_OUTPUT_PATH("backfill.phase.output.path");
-
-  String name;
-
-  BackfillPhaseConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseJob.java
deleted file mode 100644
index b3a762e871..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseJob.java
+++ /dev/null
@@ -1,203 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.backfill;
-
-import static com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseConstants.*;
-
-import java.io.FileInputStream;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-import com.google.common.collect.Lists;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This phase is for backfilling segments which are already present on pinot, with star tree and topk information
- * The pinot segments are downloaded from the table, and converted to avro files
- * These avro files are then passed on to the rest of the thirdeye-hadoop segment generation pipeline
- */
-public class BackfillPhaseJob extends Configured {
-  private static final Logger LOGGER = LoggerFactory.getLogger(BackfillPhaseJob.class);
-  private static final String DOWNLOAD = "download";
-  private static final String INPUT = "input";
-  private static final String OUTPUT = "output";
-
-  private String name;
-  private Properties props;
-
-  /**
-   * @param name
-   * @param props
-   */
-  public BackfillPhaseJob(String name, Properties props) {
-    super(new Configuration());
-    getConf().set("mapreduce.job.user.classpath.first", "true");
-    this.name = name;
-    this.props = props;
-  }
-
-  public Job run() throws Exception {
-
-    Job job = Job.getInstance(getConf());
-    job.setJarByClass(BackfillPhaseJob.class);
-    job.setJobName(name);
-
-    FileSystem fs = FileSystem.get(getConf());
-    Configuration configuration = job.getConfiguration();
-
-    LOGGER.info("*******************************************************************************");
-    String controllerHost = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_HOST);
-    String controllerPort = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_PORT);
-    LOGGER.info("Controller Host : {} Controller Port : {}", controllerHost, controllerPort);
-    String segmentStartTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_START_TIME);
-    String segmentEndTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_END_TIME);
-    long startTime = Long.valueOf(segmentStartTime);
-    long endTime = Long.valueOf(segmentEndTime);
-    if (Long.valueOf(segmentStartTime) > Long.valueOf(segmentEndTime)) {
-      throw new IllegalStateException("Start time cannot be greater than end time");
-    }
-    String tableName = getAndSetConfiguration(configuration, BACKFILL_PHASE_TABLE_NAME);
-    LOGGER.info("Start time : {} End time : {} Table name : {}", segmentStartTime, segmentEndTime, tableName);
-
-    String outputPath = getAndSetConfiguration(configuration, BACKFILL_PHASE_OUTPUT_PATH);
-    LOGGER.info("Output path : {}", outputPath);
-    Path backfillDir = new Path(outputPath);
-    if (fs.exists(backfillDir)) {
-      LOGGER.warn("Found the output folder deleting it");
-      fs.delete(backfillDir, true);
-    }
-    Path downloadDir = new Path(backfillDir, DOWNLOAD);
-    LOGGER.info("Creating download dir : {}", downloadDir);
-    fs.mkdirs(downloadDir);
-    Path inputDir = new Path(backfillDir, INPUT);
-    LOGGER.info("Creating input dir : {}", inputDir);
-    fs.mkdirs(inputDir);
-    Path outputDir = new Path(backfillDir, OUTPUT);
-    LOGGER.info("Creating output dir : {}", outputDir);
-
-    BackfillControllerAPIs backfillControllerAPIs = new BackfillControllerAPIs(controllerHost,
-        Integer.valueOf(controllerPort), tableName);
-
-    LOGGER.info("Downloading segments in range {} to {}", startTime, endTime);
-    List<String> allSegments = backfillControllerAPIs.getAllSegments(tableName);
-    List<String> segmentsToDownload = backfillControllerAPIs.findSegmentsInRange(tableName, allSegments, startTime, endTime);
-    for (String segmentName : segmentsToDownload) {
-      backfillControllerAPIs.downloadSegment(segmentName, downloadDir);
-    }
-
-    LOGGER.info("Reading downloaded segment input files");
-    List<FileStatus> inputDataFiles = new ArrayList<>();
-    inputDataFiles.addAll(Lists.newArrayList(fs.listStatus(downloadDir)));
-    LOGGER.info("size {}", inputDataFiles.size());
-
-    try {
-      LOGGER.info("Creating input files at {} for segment input files", inputDir);
-      for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
-        FileStatus file = inputDataFiles.get(seqId);
-        String completeFilePath = " " + file.getPath().toString() + " " + seqId;
-        Path newOutPutFile = new Path((inputDir + "/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
-        FSDataOutputStream stream = fs.create(newOutPutFile);
-        LOGGER.info("wrote {}", completeFilePath);
-        stream.writeUTF(completeFilePath);
-        stream.flush();
-        stream.close();
-      }
-    } catch (Exception e) {
-      LOGGER.error("Exception while reading input files ", e);
-    }
-
-    job.setMapperClass(BackfillPhaseMapJob.BackfillMapper.class);
-
-    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
-      job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
-    }
-
-    job.setInputFormatClass(TextInputFormat.class);
-    job.setOutputFormatClass(TextOutputFormat.class);
-
-    job.setMapOutputKeyClass(LongWritable.class);
-    job.setMapOutputValueClass(Text.class);
-
-    FileInputFormat.addInputPath(job, inputDir);
-    FileOutputFormat.setOutputPath(job, outputDir);
-
-    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
-    job.setMaxReduceAttempts(1);
-    job.setMaxMapAttempts(0);
-    job.setNumReduceTasks(0);
-
-    for (Object key : props.keySet()) {
-      job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
-    }
-
-    job.waitForCompletion(true);
-    if (!job.isSuccessful()) {
-      throw new RuntimeException("Job failed : " + job);
-    }
-
-    LOGGER.info("Cleanup the working directory");
-    LOGGER.info("Deleting the dir: {}", downloadDir);
-    fs.delete(downloadDir, true);
-    LOGGER.info("Deleting the dir: {}", inputDir);
-    fs.delete(inputDir, true);
-    LOGGER.info("Deleting the dir: {}", outputDir);
-    fs.delete(outputDir, true);
-
-    return job;
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-  private String getAndSetConfiguration(Configuration configuration, BackfillPhaseConstants constant) {
-    String value = getAndCheck(constant.toString());
-    configuration.set(constant.toString(), value);
-    return value;
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 1) {
-      throw new IllegalArgumentException("usage: config.properties");
-    }
-    Properties props = new Properties();
-    props.load(new FileInputStream(args[0]));
-    BackfillPhaseJob job = new BackfillPhaseJob("backfill_job", props);
-    job.run();
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java
deleted file mode 100644
index 878a727541..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.backfill;
-
-import com.linkedin.pinot.core.data.GenericRow;
-import com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-import java.io.File;
-import java.io.IOException;
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseConstants.*;
-
-/**
- * Mapper class for Backfill job, which converts a pinot segment to avro files
- */
-public class BackfillPhaseMapJob {
-
-  public static class BackfillMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
-    private static Logger LOGGER = LoggerFactory.getLogger(BackfillPhaseMapJob.class);
-
-    private Configuration properties;
-
-    private String inputPath;
-    private String outputPath;
-    private String currentDiskWorkDir;
-    private FileSystem fs;
-
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-
-      currentDiskWorkDir = "pinot_hadoop_tmp";
-      new File(currentDiskWorkDir).mkdir();
-
-      LOGGER.info("*********************************************************************");
-      LOGGER.info("Configurations : {}", context.getConfiguration().toString());
-      LOGGER.info("Current DISK working dir : {}", new File(currentDiskWorkDir).getAbsolutePath());
-      LOGGER.info("*********************************************************************");
-
-      properties = context.getConfiguration();
-      fs = FileSystem.get(new Configuration());
-
-      outputPath = properties.get(BACKFILL_PHASE_OUTPUT_PATH.toString());
-    }
-
-    @Override
-    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
-
-      String line = value.toString();
-      String[] lineSplits = line.split(" ");
-
-      LOGGER.info("*********************************************************************");
-      LOGGER.info("mapper input : {}", value);
-      LOGGER.info("Path to output : {}", outputPath);
-      LOGGER.info("num lines : {}", lineSplits.length);
-
-      for (String split : lineSplits) {
-        LOGGER.info("Command line : {}", split);
-      }
-      if (lineSplits.length != 3) {
-        throw new RuntimeException("Input to the mapper is malformed");
-      }
-      inputPath = lineSplits[1].trim();
-
-      LOGGER.info("input data file path : {}", inputPath);
-      LOGGER.info("*********************************************************************");
-
-      try {
-        createAvro(inputPath);
-        LOGGER.info("Finished avro creation job successfully");
-      } catch (Exception e) {
-        LOGGER.error("Got exceptions during creating avro!", e);
-      }
-      LOGGER.info("Finished the job successfully!");
-    }
-
-    private void createAvro(String dataFilePath) throws Exception {
-
-      Path hdfsDataPath = new Path(dataFilePath);
-      File dataPath = new File(currentDiskWorkDir, "data");
-      if (dataPath.exists()) {
-        dataPath.delete();
-      }
-      dataPath.mkdir();
-      LOGGER.info("Creating temporary data dir {}", dataPath);
-
-      final File avroPath = new File(currentDiskWorkDir, "avro");
-      if (avroPath.exists()) {
-        avroPath.delete();
-      }
-      avroPath.mkdir();
-      LOGGER.info("Creating temporary avro dir {}", avroPath);
-
-      String segmentName = hdfsDataPath.getName();
-      final Path localFilePath = new Path(dataPath + "/" + segmentName);
-      fs.copyToLocalFile(hdfsDataPath, localFilePath);
-      LOGGER.info("Copying segment {} from {} to local {}", segmentName, hdfsDataPath, localFilePath);
-      File segmentIndexDir = new File(localFilePath.toString());
-      if (!segmentIndexDir.exists()) {
-        throw new IllegalStateException("Failed to copy " + hdfsDataPath + " to " + localFilePath);
-      }
-
-      LOGGER.info("Initializing PinotSegmentRecordReader with segment index dir {}", segmentIndexDir);
-      PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(segmentIndexDir);
-      LOGGER.info("Schema {}", pinotSegmentRecordReader.getSchema());
-
-      Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSegmentRecordReader.getSchema());
-      GenericDatumWriter<GenericRecord> datum = new GenericDatumWriter<GenericRecord>(avroSchema);
-      DataFileWriter<GenericRecord> recordWriter = new DataFileWriter<GenericRecord>(datum);
-      File localAvroFile = new File(avroPath, segmentName + ThirdEyeConstants.AVRO_SUFFIX);
-      recordWriter.create(avroSchema, localAvroFile);
-
-      LOGGER.info("Converting pinot segment to avro at {}", localAvroFile);
-      while (pinotSegmentRecordReader.hasNext()) {
-        GenericRecord outputRecord = new Record(avroSchema);
-        GenericRow row = pinotSegmentRecordReader.next();
-        for (String fieldName : row.getFieldNames()) {
-          outputRecord.put(fieldName, row.getValue(fieldName));
-        }
-        recordWriter.append(outputRecord);
-      }
-      LOGGER.info("Writing to avro file at {}", localAvroFile);
-      recordWriter.close();
-      if (!localAvroFile.exists()) {
-        LOGGER.info("Failed to write avro file to {}", localAvroFile);
-      }
-      pinotSegmentRecordReader.close();
-
-      LOGGER.info("Coping avro file from {} to hdfs at {}", localAvroFile, outputPath);
-      fs.copyFromLocalFile(true, true, new Path(localAvroFile.toString()), new Path(outputPath));
-      if (!fs.exists(new Path(outputPath))) {
-        throw new IllegalStateException("Failed to copy avro file to hdfs at " + outputPath );
-      }
-      LOGGER.info("Successfully copied {} to {}", localAvroFile, outputPath);
-    }
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionSpec.java
deleted file mode 100644
index 5834765d82..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionSpec.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.Objects;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-/**
- * Class for representing dimension specs such as name
- * @param name - dimension name
- */
-public class DimensionSpec {
-  private String name;
-  private DimensionType dimensionType;
-
-  public DimensionSpec() {
-  }
-
-
-  public DimensionSpec(String name, DimensionType dimensionType) {
-    this.name = name;
-    this.dimensionType = dimensionType;
-  }
-
-  @JsonProperty
-  public String getName() {
-    return name;
-  }
-
-  @JsonProperty
-  public DimensionType getDimensionType() {
-    return dimensionType;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (!(o instanceof DimensionSpec)) {
-      return false;
-    }
-    DimensionSpec d = (DimensionSpec) o;
-
-    return Objects.equals(d.getName(), name) && Objects.equals(d.getDimensionType(), dimensionType);
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionType.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionType.java
deleted file mode 100644
index 0efaa0d145..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/DimensionType.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-/**
- * Represents the various data types supported for a dimension<br/>
- * Currently we support INT, SHORT, LONG, FLOAT, DOUBLE, STRING, BOOLEAN
- */
-public enum DimensionType {
-  INT {
-    @Override
-    public Object getValueFromString(String strVal) {
-      return Integer.valueOf(strVal);
-    }
-
-    @Override
-    public Object getDefaultNullvalue() {
-      return ThirdEyeConstants.EMPTY_INT;
-    }
-
-    @Override
-    public Object getDefaultOtherValue() {
-      return ThirdEyeConstants.EMPTY_INT;
-    }
-  },
-  SHORT {
-    @Override
-    public Object getValueFromString(String strVal) {
-      return Short.valueOf(strVal);
-    }
-
-    @Override
-    public Object getDefaultNullvalue() {
-      return ThirdEyeConstants.EMPTY_SHORT;
-    }
-
-    @Override
-    public Object getDefaultOtherValue() {
-      return ThirdEyeConstants.EMPTY_SHORT;
-    }
-  },
-  LONG {
-    @Override
-    public Object getValueFromString(String strVal) {
-      return Long.valueOf(strVal);
-    }
-
-    @Override
-    public Object getDefaultNullvalue() {
-      return ThirdEyeConstants.EMPTY_LONG;
-    }
-
-    @Override
-    public Object getDefaultOtherValue() {
-      return ThirdEyeConstants.EMPTY_LONG;
-    }
-  },
-  FLOAT {
-    @Override
-    public Object getValueFromString(String strVal) {
-      return Float.valueOf(strVal);
-    }
-
-    @Override
-    public Object getDefaultNullvalue() {
-      return ThirdEyeConstants.EMPTY_FLOAT;
-    }
-
-    @Override
-    public Object getDefaultOtherValue() {
-      return ThirdEyeConstants.EMPTY_FLOAT;
-    }
-  },
-  DOUBLE {
-    @Override
-    public Object getValueFromString(String strVal) {
-      return Double.valueOf(strVal);
-    }
-
-    @Override
-    public Object getDefaultNullvalue() {
-      return ThirdEyeConstants.EMPTY_DOUBLE;
-    }
-
-    @Override
-    public Object getDefaultOtherValue() {
-      return ThirdEyeConstants.EMPTY_DOUBLE;
-    }
-  },
-  STRING {
-    @Override
-    public Object getValueFromString(String strVal) {
-      return strVal;
-    }
-
-    @Override
-    public Object getDefaultNullvalue() {
-      return ThirdEyeConstants.EMPTY_STRING;
-    }
-
-    @Override
-    public Object getDefaultOtherValue() {
-      return ThirdEyeConstants.OTHER;
-    }
-  };
-
-
-  public abstract Object getValueFromString(String strVal);
-
-  public abstract Object getDefaultNullvalue();
-
-  public abstract Object getDefaultOtherValue();
-
-
-  /**
-   * Writes the dimension value to a data outputstream
-   * @param dos DataOutputStream
-   * @param dimensionValue
-   * @param dimensionType
-   * @throws IOException
-   */
-  public static void writeDimensionValueToOutputStream(DataOutputStream dos, Object dimensionValue,
-      DimensionType dimensionType) throws IOException {
-    switch (dimensionType) {
-    case DOUBLE:
-      dos.writeDouble((double) dimensionValue);
-      break;
-    case FLOAT:
-      dos.writeFloat((float) dimensionValue);
-      break;
-    case INT:
-      dos.writeInt((int) dimensionValue);
-      break;
-    case LONG:
-      dos.writeLong((long) dimensionValue);
-      break;
-    case SHORT:
-      dos.writeShort((short) dimensionValue);
-      break;
-    case STRING:
-      String stringVal = (String) dimensionValue;
-      byte[] bytes = stringVal.getBytes();
-      dos.writeInt(bytes.length);
-      dos.write(bytes);
-      break;
-    default:
-      throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
-    }
-  }
-
-  /**
-   * Reads the dimension value from a given data input stream
-   * @param dis DataInputStream
-   * @param dimensionType
-   * @return
-   * @throws IOException
-   */
-  public static Object readDimensionValueFromDataInputStream(DataInputStream dis, DimensionType dimensionType) throws IOException {
-    Object dimensionValue = null;
-    switch (dimensionType) {
-    case DOUBLE:
-      dimensionValue = dis.readDouble();
-      break;
-    case FLOAT:
-      dimensionValue = dis.readFloat();
-      break;
-    case INT:
-      dimensionValue = dis.readInt();
-      break;
-    case SHORT:
-      dimensionValue = dis.readShort();
-      break;
-    case LONG:
-      dimensionValue = dis.readLong();
-      break;
-    case STRING:
-      int length = dis.readInt();
-      byte[] bytes = new byte[length];
-      dis.read(bytes);
-      dimensionValue = new String(bytes);
-      break;
-    default:
-      throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
-    }
-    return dimensionValue;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricSpec.java
deleted file mode 100644
index 340048be6a..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricSpec.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-/**
- * Class for representing metric specs
- * @param name - metric name
- * @param type - metric type
- */
-public class MetricSpec {
-  private String name;
-  private MetricType type;
-
-  public MetricSpec() {
-  }
-
-  public MetricSpec(String name, MetricType type) {
-    this.name = name;
-    this.type = type;
-  }
-
-  @JsonProperty
-  public String getName() {
-    return name;
-  }
-
-  @JsonProperty
-  public MetricType getType() {
-    return type;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (!(o instanceof MetricSpec)) {
-      return false;
-    }
-
-    MetricSpec m = (MetricSpec) o;
-
-    return name.equals(m.getName()) && type.equals(m.getType());
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricType.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricType.java
deleted file mode 100644
index bb4c0ffe50..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/MetricType.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-/**
- * Represents the various data types supported for a metric<br/>
- * Currently we support INT, SHORT, LONG, FLOAT, DOUBLE
- */
-public enum MetricType {
-
-  INT {
-    public Number toNumber(String s) {
-      return Integer.parseInt(s);
-    }
-
-    public int byteSize() {
-      return 4;
-    }
-
-    @Override
-    public Number getDefaultNullValue() {
-      return ThirdEyeConstants.EMPTY_INT;
-    }
-
-  },
-  SHORT {
-    public Number toNumber(String s) {
-      return Short.parseShort(s);
-    }
-
-    public int byteSize() {
-      return 2;
-
-    }
-
-    @Override
-    public Number getDefaultNullValue() {
-      return ThirdEyeConstants.EMPTY_SHORT;
-    }
-
-  },
-  LONG {
-    public Number toNumber(String s) {
-      return Long.parseLong(s);
-    }
-
-    public int byteSize() {
-      return 8;
-
-    }
-
-    @Override
-    public Number getDefaultNullValue() {
-      return ThirdEyeConstants.EMPTY_LONG;
-    }
-
-  },
-  FLOAT {
-    public Number toNumber(String s) {
-      return Float.parseFloat(s);
-    }
-
-    public int byteSize() {
-      return 4;
-
-    }
-
-    @Override
-    public Number getDefaultNullValue() {
-      return ThirdEyeConstants.EMPTY_FLOAT;
-    }
-
-  },
-  DOUBLE {
-    public Number toNumber(String s) {
-      return Double.parseDouble(s);
-    }
-
-    public int byteSize() {
-      return 8;
-    }
-
-    @Override
-    public Number getDefaultNullValue() {
-      return ThirdEyeConstants.EMPTY_DOUBLE;
-    }
-  };
-
-  public Number toNumber(String s) {
-    throw new AbstractMethodError();
-  }
-
-  public int byteSize() {
-    throw new AbstractMethodError();
-  }
-
-  public abstract Number getDefaultNullValue();
-
-  /**
-   * Writes a metric value to a data output stream
-   * @param dos
-   * @param number
-   * @param metricType
-   * @throws IOException
-   */
-  public static void writeMetricValueToDataOutputStream(DataOutputStream dos, Number number, MetricType metricType) throws IOException {
-    switch (metricType) {
-    case SHORT:
-      dos.writeShort(number.intValue());
-      break;
-    case LONG:
-      dos.writeLong(number.longValue());
-      break;
-    case INT:
-      dos.writeInt(number.intValue());
-      break;
-    case FLOAT:
-      dos.writeFloat(number.floatValue());
-      break;
-    case DOUBLE:
-      dos.writeDouble(number.doubleValue());
-      break;
-    default:
-      throw new IllegalArgumentException("Unsupported metricType " + metricType);
-    }
-  }
-
-  /**
-   * Reads a metric value from a data input stream
-   * @param dis
-   * @param metricType
-   * @return
-   * @throws IOException
-   */
-  public static Number readMetricValueFromDataInputStream(DataInputStream dis, MetricType metricType) throws IOException {
-    Number metricValue = null;
-    switch (metricType) {
-    case SHORT:
-      metricValue = dis.readShort();
-      break;
-    case LONG:
-      metricValue = dis.readLong();
-      break;
-    case INT:
-      metricValue = dis.readInt();
-      break;
-    case FLOAT:
-      metricValue = dis.readFloat();
-      break;
-    case DOUBLE:
-      metricValue = dis.readDouble();
-      break;
-    default:
-      throw new IllegalArgumentException("Unsupported metricType " + metricType);
-    }
-    return metricValue;
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/SplitSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/SplitSpec.java
deleted file mode 100644
index a2c353e088..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/SplitSpec.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.util.List;
-
-/**
- * Class for representing split spec
- * @param threshold - threshold after which to stop splitting on a node in star tree
- * @param order - order in which dimensions should be chosen to split in star tree creation
- */
-public class SplitSpec {
-  private int threshold = 1000;
-  private List<String> order;
-
-  public SplitSpec() {
-  }
-
-  public SplitSpec(int threshold, List<String> order) {
-    this.threshold = threshold;
-    this.order = order;
-  }
-
-  @JsonProperty
-  public int getThreshold() {
-    return threshold;
-  }
-
-  @JsonProperty
-  public List<String> getOrder() {
-    return order;
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfig.java
deleted file mode 100644
index 7f5f383b8e..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfig.java
+++ /dev/null
@@ -1,479 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.lang.StringUtils;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
-import com.google.common.collect.Lists;
-import com.linkedin.pinot.common.data.TimeGranularitySpec.TimeFormat;
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.SplitSpec;
-import com.linkedin.thirdeye.hadoop.config.TimeGranularity;
-import com.linkedin.thirdeye.hadoop.config.TimeSpec;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-
-/**
- * This class represents the configs required by the thirdeye-hadoop jobs
- * @param collection - name of the pinot table
- * @param dimensions - list of dimensionSpecs for dimensions
- * @param metrics - list of metricSpecs for metrics
- * @param time - time spec
- * @topKWhitelist - metric threshold, topk and whitelist spec
- * @split - split spec
- */
-public final class ThirdEyeConfig {
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory());
-  private static final String FIELD_SEPARATOR = ",";
-  private static final String CONFIG_JOINER = ".";
-  private static final String DEFAULT_TIME_TYPE = "HOURS";
-  private static final String DEFAULT_TIME_SIZE = "1";
-  private static final String DEFAULT_TIME_FORMAT = TimeFormat.EPOCH.toString();
-
-  private String collection;
-  private List<DimensionSpec> dimensions;
-  private List<MetricSpec> metrics;
-  private TimeSpec inputTime = new TimeSpec();
-  private TimeSpec time = new TimeSpec();
-  private TopkWhitelistSpec topKWhitelist = new TopkWhitelistSpec();
-  private SplitSpec split = new SplitSpec();
-
-  public ThirdEyeConfig() {
-  }
-
-  public ThirdEyeConfig(String collection, List<DimensionSpec> dimensions,
-      List<MetricSpec> metrics, TimeSpec inputTime, TimeSpec time, TopkWhitelistSpec topKWhitelist, SplitSpec split) {
-    this.collection = collection;
-    this.dimensions = dimensions;
-    this.metrics = metrics;
-    this.inputTime = inputTime;
-    this.time = time;
-    this.topKWhitelist = topKWhitelist;
-    this.split = split;
-  }
-
-  public String getCollection() {
-    return collection;
-  }
-
-  public List<DimensionSpec> getDimensions() {
-    return dimensions;
-  }
-
-  @JsonIgnore
-  public List<String> getDimensionNames() {
-    List<String> results = new ArrayList<>(dimensions.size());
-    for (DimensionSpec dimensionSpec : dimensions) {
-      results.add(dimensionSpec.getName());
-    }
-    return results;
-  }
-
-  public List<MetricSpec> getMetrics() {
-    return metrics;
-  }
-
-  @JsonIgnore
-  public List<String> getMetricNames() {
-    List<String> results = new ArrayList<>(metrics.size());
-    for (MetricSpec metricSpec : metrics) {
-      results.add(metricSpec.getName());
-    }
-    return results;
-  }
-
-  public TimeSpec getInputTime() {
-    return inputTime;
-  }
-
-  public TimeSpec getTime() {
-    return time;
-  }
-
-  public TopkWhitelistSpec getTopKWhitelist() {
-    return topKWhitelist;
-  }
-
-  /**
-   * Returns a set of all dimensions which have either topk or whitelist config
-   * @return
-   */
-  @JsonIgnore
-  public Set<String> getTransformDimensions() {
-    Set<String> transformDimensions = new HashSet<>();
-
-    if (topKWhitelist != null) {
-      List<TopKDimensionToMetricsSpec> topk = topKWhitelist.getTopKDimensionToMetricsSpec();
-      if (topk != null) {
-        for (TopKDimensionToMetricsSpec spec : topk) {
-          transformDimensions.add(spec.getDimensionName());
-        }
-      }
-    }
-    return transformDimensions;
-  }
-
-  public SplitSpec getSplit() {
-    return split;
-  }
-
-  public String encode() throws IOException {
-    return OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(this);
-  }
-
-  public static class Builder {
-    private String collection;
-    private List<DimensionSpec> dimensions;
-    private List<MetricSpec> metrics;
-    private TimeSpec inputTime = new TimeSpec();
-    private TimeSpec time = new TimeSpec();
-    private TopkWhitelistSpec topKWhitelist = new TopkWhitelistSpec();
-    private SplitSpec split = new SplitSpec();
-
-    public String getCollection() {
-      return collection;
-    }
-
-    public Builder setCollection(String collection) {
-      this.collection = collection;
-      return this;
-    }
-
-    public List<DimensionSpec> getDimensions() {
-      return dimensions;
-    }
-
-    public Builder setDimensions(List<DimensionSpec> dimensions) {
-      this.dimensions = dimensions;
-      return this;
-    }
-
-    public List<MetricSpec> getMetrics() {
-      return metrics;
-    }
-
-    public Builder setMetrics(List<MetricSpec> metrics) {
-      this.metrics = metrics;
-      return this;
-    }
-
-    public TimeSpec getInputTime() {
-      return inputTime;
-    }
-
-    public TimeSpec getTime() {
-      return time;
-    }
-
-    public Builder setTime(TimeSpec time) {
-      this.time = time;
-      return this;
-    }
-
-    public TopkWhitelistSpec getTopKWhitelist() {
-      return topKWhitelist;
-    }
-
-    public Builder setTopKWhitelist(TopkWhitelistSpec topKWhitelist) {
-      this.topKWhitelist = topKWhitelist;
-      return this;
-    }
-
-    public SplitSpec getSplit() {
-      return split;
-    }
-
-    public Builder setSplit(SplitSpec split) {
-      this.split = split;
-      return this;
-    }
-
-    public ThirdEyeConfig build() throws Exception {
-      if (collection == null) {
-        throw new IllegalArgumentException("Must provide collection");
-      }
-
-      if (dimensions == null || dimensions.isEmpty()) {
-        throw new IllegalArgumentException("Must provide dimension names");
-      }
-
-      if (metrics == null || metrics.isEmpty()) {
-        throw new IllegalArgumentException("Must provide metric specs");
-      }
-
-      return new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
-    }
-  }
-
-  public static ThirdEyeConfig decode(InputStream inputStream) throws IOException {
-    return OBJECT_MAPPER.readValue(inputStream, ThirdEyeConfig.class);
-  }
-
-  /**
-   * Creates a ThirdEyeConfig object from the Properties object
-   * @param props
-   * @return
-   */
-  public static ThirdEyeConfig fromProperties(Properties props) {
-
-    String collection = getCollectionFromProperties(props);
-    List<DimensionSpec> dimensions = getDimensionFromProperties(props);
-    List<MetricSpec> metrics = getMetricsFromProperties(props);
-    TimeSpec inputTime = getInputTimeFromProperties(props);
-    TimeSpec time = getTimeFromProperties(props);
-    SplitSpec split = getSplitFromProperties(props);
-    TopkWhitelistSpec topKWhitelist = getTopKWhitelistFromProperties(props);
-    ThirdEyeConfig thirdeyeConfig = new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
-    return thirdeyeConfig;
-  }
-
-  private static TopkWhitelistSpec getTopKWhitelistFromProperties(Properties props) {
-    TopkWhitelistSpec topKWhitelist = null;
-
-    Map<String, Double> threshold = getThresholdFromProperties(props);
-    List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = getTopKDimensionToMetricsSpecFromProperties(props);
-    Map<String, List<String>> whitelist = getWhitelistFromProperties(props);
-    Map<String, String> nonWhitelistValue = getNonWhitelistValueFromProperties(props);
-
-    if (threshold != null || topKDimensionToMetricsSpec != null || whitelist != null) {
-      topKWhitelist = new TopkWhitelistSpec();
-      topKWhitelist.setThreshold(threshold);
-      topKWhitelist.setTopKDimensionToMetricsSpec(topKDimensionToMetricsSpec);
-      topKWhitelist.setWhitelist(whitelist);
-      topKWhitelist.setNonWhitelistValue(nonWhitelistValue);
-    }
-    return topKWhitelist;
-  }
-
-  /**
-   * Creates a map of dimension name to the value that should be used for "others"
-   * @param props
-   * @return
-   */
-  private static Map<String, String> getNonWhitelistValueFromProperties(Properties props) {
-    Map<String, String> dimensionToNonWhitelistValueMap = null;
-
-    // create dimension to type map
-    List<DimensionSpec> dimensions = getDimensionFromProperties(props);
-    Map<String, DimensionType> dimensionToType = new HashMap<>();
-    for (int i = 0; i < dimensions.size(); i ++) {
-      DimensionSpec spec = dimensions.get(i);
-      dimensionToType.put(spec.getName(), spec.getDimensionType());
-    }
-
-    // dimensions with  whitelist
-    String whitelistDimensionsStr = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), null);
-    List<String> whitelistDimensions = new ArrayList<>();
-    if (StringUtils.isNotBlank(whitelistDimensionsStr)) {
-      dimensionToNonWhitelistValueMap = new HashMap<>();
-      whitelistDimensions.addAll(Lists.newArrayList(whitelistDimensionsStr.split(FIELD_SEPARATOR)));
-    }
-
-    for (String whitelistDimension : whitelistDimensions) {
-      String nonWhitelistValue = getAndCheck(props,
-          ThirdEyeConfigProperties.THIRDEYE_NONWHITELIST_VALUE_DIMENSION.toString() + CONFIG_JOINER + whitelistDimension, null);
-      if (StringUtils.isNotBlank(nonWhitelistValue)) {
-        dimensionToNonWhitelistValueMap.put(whitelistDimension, nonWhitelistValue);
-      } else {
-        dimensionToNonWhitelistValueMap.put(whitelistDimension, String.valueOf(dimensionToType.get(whitelistDimension).getDefaultOtherValue()));
-      }
-    }
-    return dimensionToNonWhitelistValueMap;
-  }
-
-
-  private static Map<String, List<String>> getWhitelistFromProperties(Properties props) {
- // create dimension to type map
-    List<DimensionSpec> dimensions = getDimensionFromProperties(props);
-    Map<String, DimensionType> dimensionToType = new HashMap<>();
-    Map<String, Integer> dimensionToIndex = new HashMap<>();
-    for (int i = 0; i < dimensions.size(); i ++) {
-      DimensionSpec spec = dimensions.get(i);
-      dimensionToType.put(spec.getName(), spec.getDimensionType());
-      dimensionToIndex.put(spec.getName(), i);
-    }
-
-    Map<String, List<String>> whitelist = null;
-    String whitelistDimensionsStr = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), null);
-    if (whitelistDimensionsStr != null && whitelistDimensionsStr.split(FIELD_SEPARATOR).length > 0) {
-      whitelist = new HashMap<>();
-      for (String dimension : whitelistDimensionsStr.split(FIELD_SEPARATOR)) {
-        String whitelistValuesStr = getAndCheck(props,
-            ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + CONFIG_JOINER + dimension);
-        String[] whitelistValues = whitelistValuesStr.split(FIELD_SEPARATOR);
-        List<String> whitelistValuesList = Lists.newArrayList(whitelistValues);
-        whitelist.put(dimension, whitelistValuesList);
-      }
-    }
-    return whitelist;
-  }
-
-  private static List<TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpecFromProperties(Properties props) {
-    List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = null;
-    String topKDimensionNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), null);
-    if (StringUtils.isNotEmpty(topKDimensionNames) && topKDimensionNames.split(FIELD_SEPARATOR).length > 0) {
-      topKDimensionToMetricsSpec = new ArrayList<>();
-      for (String dimension : topKDimensionNames.split(FIELD_SEPARATOR)) {
-        String[] topKDimensionMetrics = getAndCheck(props,
-            ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + CONFIG_JOINER + dimension)
-            .split(FIELD_SEPARATOR);
-        String[] topKDimensionKValues = getAndCheck(props,
-            ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + CONFIG_JOINER + dimension)
-            .split(FIELD_SEPARATOR);
-        if (topKDimensionMetrics.length != topKDimensionKValues.length) {
-          throw new IllegalStateException("Number of topk metric names and kvalues should be same for a dimension");
-        }
-        Map<String, Integer> topk = new HashMap<>();
-        for (int i = 0; i < topKDimensionMetrics.length; i++) {
-          topk.put(topKDimensionMetrics[i], Integer.parseInt(topKDimensionKValues[i]));
-        }
-        topKDimensionToMetricsSpec.add(new TopKDimensionToMetricsSpec(dimension, topk));
-      }
-    }
-    return topKDimensionToMetricsSpec;
-  }
-
-  private static Map<String, Double> getThresholdFromProperties(Properties props) {
-    Map<String, Double> threshold = null;
-    String thresholdMetricNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString(), null);
-    String metricThresholdValues = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), null);
-    if (thresholdMetricNames != null && metricThresholdValues != null) {
-      String[] thresholdMetrics = thresholdMetricNames.split(FIELD_SEPARATOR);
-      String[] thresholdValues = metricThresholdValues.split(FIELD_SEPARATOR);
-      if (thresholdMetrics.length != thresholdValues.length) {
-        throw new IllegalStateException("Number of threshold metric names should be same as threshold values");
-      }
-      threshold = new HashMap<>();
-      for (int i = 0; i < thresholdMetrics.length; i++) {
-        threshold.put(thresholdMetrics[i], Double.parseDouble(thresholdValues[i]));
-      }
-    }
-    return threshold;
-  }
-
-  private static SplitSpec getSplitFromProperties(Properties props) {
-    SplitSpec split = null;
-    String splitThreshold = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_SPLIT_THRESHOLD.toString(), null);
-    if (splitThreshold != null) {
-      String splitOrder = getAndCheck(props,
-          ThirdEyeConfigProperties.THIRDEYE_SPLIT_ORDER.toString(), null);
-      List<String> splitOrderList = null;
-      if (splitOrder != null) {
-        splitOrderList = Arrays.asList(splitOrder.split(FIELD_SEPARATOR));
-      }
-      split = new SplitSpec(Integer.parseInt(splitThreshold), splitOrderList);
-    }
-    return split;
-  }
-
-  private static TimeSpec getTimeFromProperties(Properties props) {
-    String timeColumnName = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString());
-    String timeColumnType = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_TYPE.toString(), DEFAULT_TIME_TYPE);
-    String timeColumnSize = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_SIZE.toString(), DEFAULT_TIME_SIZE);
-    TimeGranularity timeGranularity = new TimeGranularity(Integer.parseInt(timeColumnSize), TimeUnit.valueOf(timeColumnType));
-    String timeFormat = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_FORMAT.toString(), DEFAULT_TIME_FORMAT);
-    TimeSpec time = new TimeSpec(timeColumnName, timeGranularity, timeFormat);
-    return time;
-  }
-
-
-  private static TimeSpec getInputTimeFromProperties(Properties props) {
-    TimeSpec inputTime = null;
-    String timeColumnName = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString());
-    String timeColumnType = getAndCheck(props,
-          ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_TYPE.toString(), null);
-    String timeColumnSize = getAndCheck(props,
-          ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_SIZE.toString(), null);
-    String timeFormat = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_FORMAT.toString(), DEFAULT_TIME_FORMAT);
-    if (timeColumnType != null && timeColumnSize != null) {
-      TimeGranularity timeGranularity = new TimeGranularity(Integer.parseInt(timeColumnSize), TimeUnit.valueOf(timeColumnType));
-      inputTime = new TimeSpec(timeColumnName, timeGranularity, timeFormat);
-    }
-    return inputTime;
-  }
-
-  private static List<MetricSpec> getMetricsFromProperties(Properties props) {
-    List<MetricSpec> metrics = new ArrayList<>();
-    String[] metricNames = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()).split(FIELD_SEPARATOR);
-    String[] metricTypes = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()).split(FIELD_SEPARATOR);
-    if (metricNames.length != metricTypes.length) {
-      throw new IllegalStateException("Number of metric names provided "
-          + "should be same as number of metric types");
-    }
-    for (int i = 0; i < metricNames.length; i++) {
-      metrics.add(new MetricSpec(metricNames[i], MetricType.valueOf(metricTypes[i])));
-    }
-    return metrics;
-  }
-
-  private static List<DimensionSpec> getDimensionFromProperties(Properties props) {
-    List<DimensionSpec> dimensions = new ArrayList<>();
-    String[] dimensionNames = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()).split(FIELD_SEPARATOR);
-    String[] dimensionTypes = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString()).split(FIELD_SEPARATOR);
-    for (int i = 0; i < dimensionNames.length; i++) {
-      dimensions.add(new DimensionSpec(dimensionNames[i], DimensionType.valueOf(dimensionTypes[i])));
-    }
-    return dimensions;
-  }
-
-  private static String getCollectionFromProperties(Properties props) {
-    String collection = getAndCheck(props,
-        ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString());
-    return collection;
-  }
-
-  private static String getAndCheck(Properties props, String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-  private static String getAndCheck(Properties props, String propName, String defaultValue) {
-    String propValue = props.getProperty(propName, defaultValue);
-    return propValue;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigProperties.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigProperties.java
deleted file mode 100644
index 18be146baa..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigProperties.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-/**
- * Class for representing all property names used in thirdeye-hadoop jobs
- */
-public enum ThirdEyeConfigProperties {
-
-  /** Pinot table name */
-  THIRDEYE_TABLE_NAME("thirdeye.table.name"),
-
-  /** Comma Separated dimension names */
-  THIRDEYE_DIMENSION_NAMES("thirdeye.dimension.names"),
-
-  /** Comma Separated dimension types */
-  THIRDEYE_DIMENSION_TYPES("thirdeye.dimension.types"),
-
-  /** Comma separated metric names */
-  THIRDEYE_METRIC_NAMES("thirdeye.metric.names"),
-
-  /** Comma separated metric types */
-  THIRDEYE_METRIC_TYPES("thirdeye.metric.types"),
-
-  /** Time column name */
-  THIRDEYE_TIMECOLUMN_NAME("thirdeye.timecolumn.name"),
-
-  /** Time input column type before aggregation (HOURS, DAYS etc) */
-  THIRDEYE_INPUT_TIMECOLUMN_TYPE("thirdeye.input.timecolumn.type"),
-
-  /** Time input bucket size before aggregation*/
-  THIRDEYE_INPUT_TIMECOLUMN_SIZE("thirdeye.input.timecolumn.size"),
-
-  /** Time format
-   * Can be either EPOCH (default) or SIMPLE_DATE_FORMAT:pattern e.g SIMPLE_DATE_FORMAT:yyyyMMdd  */
-  THIRDEYE_INPUT_TIMECOLUMN_FORMAT("thirdeye.input.timecolumn.format"),
-
-  /** Time column type (HOURS, DAYS etc) */
-  THIRDEYE_TIMECOLUMN_TYPE("thirdeye.timecolumn.type"),
-
-  /** Time bucket size */
-  THIRDEYE_TIMECOLUMN_SIZE("thirdeye.timecolumn.size"),
-
-  /** Time format
-   * Can be either EPOCH (default) or SIMPLE_DATE_FORMAT:pattern e.g SIMPLE_DATE_FORMAT:yyyyMMdd  */
-  THIRDEYE_TIMECOLUMN_FORMAT("thirdeye.timecolumn.format"),
-
-  /** Split threshold for star tree */
-  THIRDEYE_SPLIT_THRESHOLD("thirdeye.split.threshold"),
-
-  /** Split order for star tree */
-  THIRDEYE_SPLIT_ORDER("thirdeye.split.order"),
-
-  /** Comma separated metric names for threshold filtering */
-  THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES("thirdeye.topk.threshold.metric.names"),
-
-  /** Comma separated metric threshold values */
-  THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES("thirdeye.topk.metric.threshold.values"),
-
-  /** Comma separated dimension names for topk config */
-  THIRDEYE_TOPK_DIMENSION_NAMES("thirdeye.topk.dimension.names"),
-
-  /** Use by appending dimension name at the end eg: thirdeye.topk.metrics.d1
-   * Comma separated metrics with topk specification for given dimension */
-  THIRDEYE_TOPK_METRICS("thirdeye.topk.metrics"),
-
-  /** Use by appending dimension name at the end eg: thirdeye.topk.kvalues.d1
-   * Comma separated top k values for corresponding metrics for given dimension */
-  THIRDEYE_TOPK_KVALUES("thirdeye.topk.kvalues"),
-
-  /** Comma separated dimension names which have whitelist */
-  THIRDEYE_WHITELIST_DIMENSION_NAMES("thirdeye.whitelist.dimension.names"),
-
-  /** Use by appending dimension name at the end eg: thirdeye.whitelist.dimension.d1
-   * Comma separated list of values to whitelist for given dimension */
-  THIRDEYE_WHITELIST_DIMENSION("thirdeye.whitelist.dimension"),
-
-  /** Use by appending dimension name at the end eg: thirdeye.nonwhitelist.value.dimension.d1
-   * Value to be used for values which don't belong to whitelist */
-  THIRDEYE_NONWHITELIST_VALUE_DIMENSION("thirdeye.nonwhitelist.value.dimension");
-
-  String name;
-
-  ThirdEyeConfigProperties(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConstants.java
deleted file mode 100644
index b088835737..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConstants.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.config;
-
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-
-public final class ThirdEyeConstants {
-  public static final String TOPK_VALUES_FILE = "topk_values";
-  public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormat.forPattern("YYYY-MM-dd-HHmmss");
-  public static final String TOPK_DIMENSION_SUFFIX = "_topk";
-  public static final String OTHER = "other";
-  public static final String EMPTY_STRING = "";
-  public static final Number EMPTY_NUMBER = 0;
-  public static final Double EMPTY_DOUBLE = 0d;
-  public static final Float EMPTY_FLOAT = 0f;
-  public static final Integer EMPTY_INT = 0;
-  public static final Long EMPTY_LONG = 0l;
-  public static final Short EMPTY_SHORT = 0;
-  public static final String SEGMENT_JOINER = "_";
-  public static final String AUTO_METRIC_COUNT = "__COUNT";
-  public static final String FIELD_SEPARATOR = ",";
-  public static final String TAR_SUFFIX = ".tar.gz";
-  public static final String AVRO_SUFFIX = ".avro";
-  public static final String SDF_SEPARATOR = ":";
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeGranularity.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeGranularity.java
deleted file mode 100644
index c10e056fb3..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeGranularity.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.Objects;
-import java.util.concurrent.TimeUnit;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-/**
- * TimeGranularity class contains time unit and time size of the star tree time config
- *
- *  unit: the TimeUnit of the column
- *  size: the bucket size of the time column
- */
-public class TimeGranularity {
-  private static int DEFAULT_TIME_SIZE = 1;
-
-  private int size = DEFAULT_TIME_SIZE;
-  private TimeUnit unit;
-
-  public TimeGranularity() {
-  }
-
-  public TimeGranularity(int size, TimeUnit unit) {
-    this.size = size;
-    this.unit = unit;
-  }
-
-  @JsonProperty
-  public int getSize() {
-    return size;
-  }
-
-  @JsonProperty
-  public TimeUnit getUnit() {
-    return unit;
-  }
-
-  public long toMillis() {
-    return toMillis(1);
-  }
-
-  /**
-   * Converts time in bucketed unit to millis
-   *
-   * @param time
-   * @return
-   */
-  public long toMillis(long time) {
-    return unit.toMillis(time * size);
-  }
-
-  /**
-   * Converts millis to time unit
-   *
-   * e.g. If TimeGranularity is defined as 1 HOURS,
-   * and we invoke convertToUnit(1458284400000) (i.e. 2016-03-18 00:00:00)
-   * this method will return HOURS.convert(1458284400000, MILLISECONDS)/1 = 405079 hoursSinceEpoch
-   *
-   * If TimeGranularity is defined as 10 MINUTES,
-   * and we invoke convertToUnit(1458284400000) (i.e. 2016-03-18 00:00:00)
-   * this method will return MINUTES.convert(1458284400000, MILLISECONDS)/10 = 2430474 tenMinutesSinceEpoch
-   * @param millis
-   * @return
-   */
-  public long convertToUnit(long millis) {
-    return unit.convert(millis, TimeUnit.MILLISECONDS) / size;
-  }
-
-  @Override
-  public String toString() {
-    return size + "-" + unit;
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(size, unit);
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (!(obj instanceof TimeGranularity)) {
-      return false;
-    }
-    TimeGranularity other = (TimeGranularity) obj;
-    return Objects.equals(other.size, this.size) && Objects.equals(other.unit, this.unit);
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeSpec.java
deleted file mode 100644
index 60a254e4d9..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TimeSpec.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.linkedin.pinot.common.data.TimeGranularitySpec.TimeFormat;
-
-import java.util.concurrent.TimeUnit;
-
-/** This class represents the time spec for thirdeye-hadoop jobs
- * @param columnName - columnName which represents time
- * @param timeGranularity - time granularity for the time column
- */
-public class TimeSpec {
-  private static final TimeGranularity DEFAULT_TIME_GRANULARITY = new TimeGranularity(1, TimeUnit.HOURS);
-  private static final String DEFAULT_TIME_FORMAT = TimeFormat.EPOCH.toString();
-
-  private String columnName;
-  private TimeGranularity timeGranularity = DEFAULT_TIME_GRANULARITY;
-  private String timeFormat = DEFAULT_TIME_FORMAT;
-
-  public TimeSpec() {
-  }
-
-  public TimeSpec(String columnName, TimeGranularity timeGranularity, String timeFormat) {
-    this.columnName = columnName;
-    this.timeGranularity = timeGranularity;
-    this.timeFormat = timeFormat;
-  }
-
-  @JsonProperty
-  public String getColumnName() {
-    return columnName;
-  }
-
-  @JsonProperty
-  public TimeGranularity getTimeGranularity() {
-    return timeGranularity;
-  }
-
-  @JsonProperty
-  public String getTimeFormat() {
-    return timeFormat;
-  }
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java
deleted file mode 100644
index 55d74257bd..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.Map;
-
-/**
- * This class manages config for dimension with topk
- * config defined on multiple metrics
- * @param dimensionName - The dimension of this topk config
- * @param topk - map of metric name to k value
- */
-public class TopKDimensionToMetricsSpec {
-
-  String dimensionName;
-  Map<String, Integer> topk;
-
-  public TopKDimensionToMetricsSpec() {
-
-  }
-
-  public TopKDimensionToMetricsSpec(String dimensionName, Map<String, Integer> topk) {
-    this.dimensionName = dimensionName;
-    this.topk = topk;
-  }
-
-  public String getDimensionName() {
-    return dimensionName;
-  }
-
-  public void setDimensionName(String dimensionName) {
-    this.dimensionName = dimensionName;
-  }
-
-  public Map<String, Integer> getTopk() {
-    return topk;
-  }
-
-  public void setTopk(Map<String, Integer> topk) {
-    this.topk = topk;
-  }
-
-  public String toString() {
-    return "{ dimensionName : " + dimensionName + ", topk : " + topk + " }";
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopkWhitelistSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopkWhitelistSpec.java
deleted file mode 100644
index 2861d40c01..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/config/TopkWhitelistSpec.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * Config class to define topk and whitelist
- *
- * @param threshold - dimension values which do not satisfy metric thresholds will be ignored.
- * The metric total contributed by a dimension will be compared with the metric total across all the records.
- *
- * @param topKDimensionToMetricsSpec - list of dimension and a map of metric to topk value for that dimension
- * Only top k values for the dimension will be picked, based on metric
- *
- * @param whitelist - values to whitelist for given dimension (dimension:whitelist values)
- *
- * @param non whitelist value - value to be used for a dimension value, which is not in whitelist
- */
-public class TopkWhitelistSpec {
-
-  Map<String, Double> threshold;
-  List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec;
-  Map<String, List<String>> whitelist;
-  Map<String, String> nonWhitelistValue;
-
-  public TopkWhitelistSpec() {
-
-  }
-
-  public Map<String, Double> getThreshold() {
-    return threshold;
-  }
-
-  public void setThreshold(Map<String, Double> threshold) {
-    this.threshold = threshold;
-  }
-
-  public List<TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpec() {
-    return topKDimensionToMetricsSpec;
-  }
-
-  public void setTopKDimensionToMetricsSpec(List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec) {
-    this.topKDimensionToMetricsSpec = topKDimensionToMetricsSpec;
-  }
-
-  public Map<String, List<String>> getWhitelist() {
-    return whitelist;
-  }
-
-  public void setWhitelist(Map<String, List<String>> whitelist) {
-    this.whitelist = whitelist;
-  }
-
-  public Map<String, String> getNonWhitelistValue() {
-    return nonWhitelistValue;
-  }
-
-  public void setNonWhitelistValue(Map<String, String> nonWhitelistValue) {
-    this.nonWhitelistValue = nonWhitelistValue;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java
deleted file mode 100644
index a90a77a6fa..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * This class contains the config needed by TopKColumnTransformation
- * and the methods to obtain the config from the ThirdEyeConfig
- */
-public class DerivedColumnTransformationPhaseConfig {
-  private List<String> dimensionNames;
-  private List<DimensionType> dimensionTypes;
-  private List<String> metricNames;
-  private List<MetricType> metricTypes;
-  private String timeColumnName;
-  private Map<String, List<String>> whitelist;
-  private Map<String, String> nonWhitelistValue;
-
-
-  public DerivedColumnTransformationPhaseConfig() {
-
-  }
-
-  /**
-   * @param dimensionNames
-   * @param dimensionTypes
-   * @param metricNames
-   * @param metricTypes
-   * @param timeColumnName
-   * @param whitelist
-   */
-  public DerivedColumnTransformationPhaseConfig(List<String> dimensionNames, List<DimensionType> dimensionTypes,
-      List<String> metricNames, List<MetricType> metricTypes, String timeColumnName,
-      Map<String, List<String>> whitelist, Map<String, String> nonWhitelistValue) {
-    super();
-    this.dimensionNames = dimensionNames;
-    this.dimensionTypes = dimensionTypes;
-    this.metricNames = metricNames;
-    this.metricTypes = metricTypes;
-    this.timeColumnName = timeColumnName;
-    this.whitelist = whitelist;
-    this.nonWhitelistValue = nonWhitelistValue;
-  }
-
-  public List<String> getDimensionNames() {
-    return dimensionNames;
-  }
-
-  public List<DimensionType> getDimensionTypes() {
-    return dimensionTypes;
-  }
-
-  public List<String> getMetricNames() {
-    return metricNames;
-  }
-
-  public List<MetricType> getMetricTypes() {
-    return metricTypes;
-  }
-
-  public String getTimeColumnName() {
-    return timeColumnName;
-  }
-
-  public Map<String, List<String>> getWhitelist() {
-    return whitelist;
-  }
-
-  public Map<String, String> getNonWhitelistValue() {
-    return nonWhitelistValue;
-  }
-
-  public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
-
-    // metrics
-    List<String> metricNames = new ArrayList<>(config.getMetrics().size());
-    List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
-    for (MetricSpec spec : config.getMetrics()) {
-      metricNames.add(spec.getName());
-      metricTypes.add(spec.getType());
-    }
-
-    // dimensions
-    List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
-    List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
-    for (DimensionSpec spec : config.getDimensions()) {
-      dimensionNames.add(spec.getName());
-      dimensionTypes.add(spec.getDimensionType());
-    }
-
-    // time
-    String timeColumnName = config.getTime().getColumnName();
-
-    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
-    Map<String, List<String>> whitelist = new HashMap<>();
-
-    // topkwhitelist
-    if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
-      whitelist.putAll(topKWhitelist.getWhitelist());
-    }
-
-    Map<String, String> nonWhitelistValueMap = new HashMap<>();
-    if (topKWhitelist != null && topKWhitelist.getNonWhitelistValue() != null) {
-      nonWhitelistValueMap.putAll(topKWhitelist.getNonWhitelistValue());
-    }
-
-    return new DerivedColumnTransformationPhaseConfig(dimensionNames, dimensionTypes, metricNames, metricTypes,
-        timeColumnName, whitelist, nonWhitelistValueMap);
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java
deleted file mode 100644
index 151a853749..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-/**
- * This class contains the properties to be set for topk column transformation phase
- */
-public enum DerivedColumnTransformationPhaseConstants {
-  DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH("derived.column.transformation.phase.input.path"),
-  DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH("derived.column.transformation.phase.topk.path"),
-  DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH("derived.column.transformation.phase.output.path"),
-  DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA("derived.column.transformation.phase.output.schema"),
-  DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG("derived.column.transformation.phase.thirdeye.config");
-
-  String name;
-
-  DerivedColumnTransformationPhaseConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java
deleted file mode 100644
index 88dbe2c6bd..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java
+++ /dev/null
@@ -1,403 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA;
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG;
-import static com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH;
-
-import java.io.DataInput;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.topk.TopKDimensionValues;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-import org.apache.avro.Schema;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.SchemaBuilder.BaseFieldTypeBuilder;
-import org.apache.avro.SchemaBuilder.FieldAssembler;
-import org.apache.avro.SchemaBuilder.RecordBuilder;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroJob;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyOutputFormat;
-import org.apache.avro.mapreduce.AvroMultipleOutputs;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-/**
- * This phase will add a new column for every column that has topk config
- * The new column added will be called "column_topk" (containing only topk values plus any whitelist)
- * and "column" will contain all values with whitelist applied
- * For all non topk values, the dimension value will be replaced by "other"
- * For all non-whitelist values, the dimension value will be replaced by the defaultOtherValue specified in DimensionType
- * This default other value can be configured, using config like thirdeye.nonwhitelist.value.dimension.d1=x
- */
-public class DerivedColumnTransformationPhaseJob extends Configured {
-  private static final Logger LOGGER = LoggerFactory.getLogger(DerivedColumnTransformationPhaseJob.class);
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-  private String name;
-  private Properties props;
-
-  /**
-   * @param name
-   * @param props
-   */
-  public DerivedColumnTransformationPhaseJob(String name, Properties props) {
-    super(new Configuration());
-    this.name = name;
-    this.props = props;
-  }
-
-  public static class DerivedColumnTransformationPhaseMapper
-      extends Mapper<AvroKey<GenericRecord>, NullWritable, AvroKey<GenericRecord>, NullWritable> {
-
-    private Schema outputSchema;
-    private ThirdEyeConfig thirdeyeConfig;
-    private DerivedColumnTransformationPhaseConfig config;
-    private List<String> dimensionsNames;
-    private List<DimensionType> dimensionsTypes;
-    private List<String> metricNames;
-    private List<MetricType> metricTypes;
-    private TopKDimensionValues topKDimensionValues;
-    private Map<String, Set<String>> topKDimensionsMap;
-    private Map<String, List<String>> whitelist;
-    private Map<String, String> nonWhitelistValueMap;
-    private String timeColumnName;
-
-    private AvroMultipleOutputs avroMultipleOutputs;
-    String inputFileName;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-      LOGGER.info("DerivedColumnTransformationPhaseJob.DerivedColumnTransformationPhaseMapper.setup()");
-      Configuration configuration = context.getConfiguration();
-      FileSystem fs = FileSystem.get(configuration);
-
-      FileSplit fileSplit = (FileSplit) context.getInputSplit();
-      inputFileName = fileSplit.getPath().getName();
-      inputFileName = inputFileName.substring(0, inputFileName.lastIndexOf(ThirdEyeConstants.AVRO_SUFFIX));
-      LOGGER.info("split name:" + inputFileName);
-
-      thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-      config = DerivedColumnTransformationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-      dimensionsNames = config.getDimensionNames();
-      dimensionsTypes = config.getDimensionTypes();
-      metricNames = config.getMetricNames();
-      metricTypes = config.getMetricTypes();
-      timeColumnName = config.getTimeColumnName();
-      whitelist = config.getWhitelist();
-      nonWhitelistValueMap = config.getNonWhitelistValue();
-
-      outputSchema = new Schema.Parser().parse(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString()));
-
-      Path topKPath = new Path(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString())
-          + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE);
-      topKDimensionValues = new TopKDimensionValues();
-      if (fs.exists(topKPath)) {
-        FSDataInputStream topkValuesStream = fs.open(topKPath);
-        topKDimensionValues = OBJECT_MAPPER.readValue((DataInput) topkValuesStream, TopKDimensionValues.class);
-        topkValuesStream.close();
-      }
-      topKDimensionsMap = topKDimensionValues.getTopKDimensions();
-
-      avroMultipleOutputs = new AvroMultipleOutputs(context);
-    }
-
-
-    @Override
-    public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
-        throws IOException, InterruptedException {
-
-      // input record
-      GenericRecord inputRecord = key.datum();
-
-      // output record
-      GenericRecord outputRecord = new Record(outputSchema);
-
-      // dimensions
-      for (int i = 0; i < dimensionsNames.size(); i++) {
-
-        String dimensionName = dimensionsNames.get(i);
-        DimensionType dimensionType = dimensionsTypes.get(i);
-        Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
-        String dimensionValueStr = String.valueOf(dimensionValue);
-
-
-        // add original dimension value with whitelist applied
-        Object whitelistDimensionValue = dimensionValue;
-        if (whitelist != null) {
-          List<String> whitelistDimensions = whitelist.get(dimensionName);
-          if (CollectionUtils.isNotEmpty(whitelistDimensions)) {
-            // whitelist config exists for this dimension but value not present in whitelist
-            if (!whitelistDimensions.contains(dimensionValueStr)) {
-              whitelistDimensionValue = dimensionType.getValueFromString(nonWhitelistValueMap.get(dimensionName));
-            }
-          }
-        }
-        outputRecord.put(dimensionName, whitelistDimensionValue);
-
-        // add column for topk, if topk config exists for that column, plus any whitelist values
-        if (topKDimensionsMap.containsKey(dimensionName)) {
-          Set<String> topKDimensionValues = topKDimensionsMap.get(dimensionName);
-          // if topk config exists for that dimension
-          if (CollectionUtils.isNotEmpty(topKDimensionValues)) {
-            String topkDimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
-            Object topkDimensionValue = dimensionValue;
-            // topk config exists for this dimension, but value not present in topk or whitelist
-            if (!topKDimensionValues.contains(dimensionValueStr) &&
-                (whitelist == null || whitelist.get(dimensionName) == null
-                || !whitelist.get(dimensionName).contains(dimensionValueStr))) {
-              topkDimensionValue = ThirdEyeConstants.OTHER;
-            }
-            outputRecord.put(topkDimensionName, String.valueOf(topkDimensionValue));
-          }
-        }
-      }
-
-      // metrics
-      for (int i = 0; i < metricNames.size(); i ++) {
-        String metricName = metricNames.get(i);
-        MetricType metricType = metricTypes.get(i);
-        outputRecord.put(metricName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName, metricType));
-      }
-
-      // time
-      outputRecord.put(timeColumnName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName));
-
-      AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
-      avroMultipleOutputs.write(outputKey, NullWritable.get(), inputFileName);
-    }
-
-    @Override
-    public void cleanup(Context context) throws IOException, InterruptedException {
-      avroMultipleOutputs.close();
-    }
-
-
-  }
-
-  public Job run() throws Exception {
-    Job job = Job.getInstance(getConf());
-    job.setJobName(name);
-    job.setJarByClass(DerivedColumnTransformationPhaseJob.class);
-
-    Configuration configuration = job.getConfiguration();
-    FileSystem fs = FileSystem.get(configuration);
-
-    // Input Path
-    String inputPathDir = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH);
-    LOGGER.info("Input path dir: " + inputPathDir);
-    for (String inputPath : inputPathDir.split(",")) {
-      LOGGER.info("Adding input:" + inputPath);
-      Path input = new Path(inputPath);
-      FileInputFormat.addInputPath(job, input);
-    }
-
-    // Topk path
-    String topkPath = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH);
-    LOGGER.info("Topk path : " + topkPath);
-
-    // Output path
-    Path outputPath = new Path(getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH));
-    LOGGER.info("Output path dir: " + outputPath.toString());
-    if (fs.exists(outputPath)) {
-      fs.delete(outputPath, true);
-    }
-    FileOutputFormat.setOutputPath(job, outputPath);
-
-    // Schema
-    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
-    LOGGER.info("Schema : {}", avroSchema.toString(true));
-
-    // ThirdEyeConfig
-    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
-    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
-    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(),
-        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-    LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
-
-    // New schema
-    Schema outputSchema = newSchema(thirdeyeConfig);
-    job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(), outputSchema.toString());
-
-    // Map config
-    job.setMapperClass(DerivedColumnTransformationPhaseMapper.class);
-    job.setInputFormatClass(AvroKeyInputFormat.class);
-    job.setMapOutputKeyClass(AvroKey.class);
-    job.setMapOutputValueClass(NullWritable.class);
-    AvroJob.setOutputKeySchema(job, outputSchema);
-    LazyOutputFormat.setOutputFormatClass(job, AvroKeyOutputFormat.class);
-    AvroMultipleOutputs.addNamedOutput(job, "avro", AvroKeyOutputFormat.class, outputSchema);
-
-    job.setNumReduceTasks(0);
-
-    job.waitForCompletion(true);
-
-    return job;
-  }
-
-
-  public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
-    Schema outputSchema = null;
-
-    Set<String> topKTransformDimensionSet = new HashSet<>();
-    TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
-
-    // gather topk columns
-    if (topkWhitelist != null) {
-      List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
-      if (topKDimensionToMetricsSpecs != null) {
-        for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
-          topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
-        }
-      }
-    }
-    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
-    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
-
-    // add new column for topk columns
-    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
-      String dimensionName = dimensionSpec.getName();
-      DimensionType dimensionType = dimensionSpec.getDimensionType();
-      BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(dimensionName).type().nullable();
-
-      switch (dimensionType) {
-      case DOUBLE:
-        fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
-        break;
-      case FLOAT:
-        fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
-        break;
-      case INT:
-      case SHORT:
-        fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
-        break;
-      case LONG:
-        fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
-        break;
-      case STRING:
-        fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
-        break;
-      default:
-        throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
-      }
-      if (topKTransformDimensionSet.contains(dimensionName)) {
-        fieldAssembler = fieldAssembler.name(dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
-      }
-    }
-
-    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
-      String metric = metricSpec.getName();
-      MetricType metricType = metricSpec.getType();
-      BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
-
-      switch (metricType) {
-        case SHORT:
-        case INT:
-          fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
-          break;
-        case FLOAT:
-          fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
-          break;
-        case DOUBLE:
-          fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
-          break;
-        case LONG:
-        default:
-          fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
-      }
-    }
-
-    String timeColumnName = thirdeyeConfig.getTime().getColumnName();
-    fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
-
-    outputSchema = fieldAssembler.endRecord();
-    LOGGER.info("New schema {}", outputSchema.toString(true));
-
-    return outputSchema;
-  }
-
-  private String getAndSetConfiguration(Configuration configuration,
-      DerivedColumnTransformationPhaseConstants constant) {
-    String value = getAndCheck(constant.toString());
-    configuration.set(constant.toString(), value);
-    return value;
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 1) {
-      throw new IllegalArgumentException("usage: config.properties");
-    }
-
-    Properties props = new Properties();
-    props.load(new FileInputStream(args[0]));
-    DerivedColumnTransformationPhaseJob job = new DerivedColumnTransformationPhaseJob("derived_column_transformation_job", props);
-    job.run();
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinConfigUDF.java
deleted file mode 100644
index f13e579d65..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinConfigUDF.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import org.apache.hadoop.mapreduce.Job;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultJoinConfigUDF implements JoinConfigUDF {
-  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultJoinConfigUDF.class);
-
-  @Override
-  public void setJoinConfig(Job job) {
-
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java
deleted file mode 100644
index 461f16ed1b..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.util.Map;
-
-import org.apache.avro.generic.GenericRecord;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultJoinKeyExtractor implements JoinKeyExtractor {
-  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultJoinKeyExtractor.class);
-
-  private Map<String, String> joinKeyMap;
-  private String defaultJoinKey;
-
-  public DefaultJoinKeyExtractor(Map<String, String> params) {
-    this.joinKeyMap = params;
-    this.defaultJoinKey = params.get("defaultJoinKey");
-  }
-
-  @Override
-  public String extractJoinKey(String sourceName, GenericRecord record) {
-
-    String joinKey = defaultJoinKey;
-    if (joinKeyMap != null && joinKeyMap.containsKey(sourceName)) {
-      joinKey = joinKeyMap.get(sourceName);
-    }
-    String ret = "INVALID";
-    if (joinKey != null) {
-      Object object = record.get(joinKey);
-      if (object != null) {
-        ret = object.toString();
-      }
-    }
-    LOGGER.info("source:{} JoinKey:{} value:{}", sourceName, joinKey, ret);
-    return ret;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java
deleted file mode 100644
index 0ca9266d56..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyRecordReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.codehaus.jackson.JsonParseException;
-import org.codehaus.jackson.map.JsonMappingException;
-import org.codehaus.jackson.map.ObjectMapper;
-
-import org.codehaus.jackson.type.TypeReference;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DelegatingAvroKeyInputFormat<T> extends AvroKeyInputFormat<T> {
-  private static final Logger LOGGER = LoggerFactory.getLogger(DelegatingAvroKeyInputFormat.class);
-  private static TypeReference MAP_STRING_STRING_TYPE = new TypeReference<Map<String, String>>() {
-  };
-
-  public org.apache.hadoop.mapreduce.RecordReader<org.apache.avro.mapred.AvroKey<T>, NullWritable> createRecordReader(
-      InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
-    LOGGER.info("DelegatingAvroKeyInputFormat.createRecordReader()  for split:{}", split);
-    FileSplit fileSplit = (FileSplit) split;
-    Configuration configuration = context.getConfiguration();
-    String sourceName = getSourceNameFromPath(fileSplit, configuration);
-    LOGGER.info("Source Name for path {} : {}", fileSplit.getPath(), sourceName);
-    Map<String, String> schemaJSONMapping = new ObjectMapper()
-        .readValue(configuration.get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
-
-    LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
-
-    String sourceSchemaJSON = schemaJSONMapping.get(sourceName);
-
-    Schema schema = new Schema.Parser().parse(sourceSchemaJSON);
-    return new AvroKeyRecordReader<T>(schema);
-  }
-
-  public static String getSourceNameFromPath(FileSplit fileSplit, Configuration configuration)
-      throws IOException, JsonParseException, JsonMappingException {
-    String content = configuration.get("schema.path.mapping");
-    Map<String, String> schemaPathMapping =
-        new ObjectMapper().readValue(content, MAP_STRING_STRING_TYPE);
-    LOGGER.info("Schema Path Mapping: {}", schemaPathMapping);
-
-    String sourceName = null;
-    for (String path : schemaPathMapping.keySet()) {
-      if (fileSplit.getPath().toString().indexOf(path) > -1) {
-        sourceName = schemaPathMapping.get(path);
-        break;
-      }
-    }
-    return sourceName;
-  };
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDF.java
deleted file mode 100644
index e4312feaa7..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDF.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.linkedin.thirdeye.hadoop.join.GenericJoinUDFConfig.Field;
-
-public class GenericJoinUDF implements JoinUDF {
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(GenericJoinUDF.class);
-  private GenericJoinUDFConfig config;
-  private Schema outputSchema;
-  private List<Field> fields;
-
-  public GenericJoinUDF(Map<String, String> params) {
-    LOGGER.info("Initializing GenericJoinUDF with params:" + params);
-    this.config = new GenericJoinUDFConfig(params);
-    fields = config.getFields();
-  }
-
-  @Override
-  public void init(Schema outputSchema) {
-    this.outputSchema = outputSchema;
-  }
-
-  /**
-   * Trivial implementation of a generic join udf. Assumes the data type is the
-   * same in source and output.
-   */
-  @Override
-  public List<GenericRecord> performJoin(Object joinKeyVal,
-      Map<String, List<GenericRecord>> joinInput) {
-
-    List<GenericRecord> outputRecords = new ArrayList<GenericRecord>();
-    GenericRecord outputRecord = new GenericData.Record(outputSchema);
-    for (Field field : fields) {
-      Object value = null;
-      // try to find the field in one of the source events, break out as soon as
-      // we find a non null value
-      for (String source : field.sourceEvents) {
-        List<GenericRecord> list = joinInput.get(source);
-        if (list != null && list.size() >= 1) {
-          for (GenericRecord record : list) {
-            value = record.get(field.name);
-            if (value != null) {
-              break;
-            }
-          }
-        }
-        if (value != null) {
-          break;
-        }
-      }
-      if (value != null) {
-        outputRecord.put(field.name, value);
-      }
-    }
-    outputRecords.add(outputRecord);
-    return outputRecords;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDFConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDFConfig.java
deleted file mode 100644
index 0d9b6d4cde..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/GenericJoinUDFConfig.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Type;
-
-public class GenericJoinUDFConfig {
-
-  List<Field> fields;
-
-  public GenericJoinUDFConfig(Map<String, String> params) {
-    fields = new ArrayList<Field>();
-    String fieldNamesString = params.get("field.names");
-    String[] split = fieldNamesString.split(",");
-    for (String fieldName : split) {
-      Field field = new Field();
-      field.name = fieldName;
-      String type = params.get(fieldName + ".type");
-      if (type != null) {
-        field.type = Schema.Type.valueOf(type.toUpperCase());
-      }
-      field.sourceEvents = new ArrayList<String>();
-      String[] fieldSources = params.get(fieldName + ".sources").split(",");
-      for (String fieldSource : fieldSources) {
-        field.sourceEvents.add(fieldSource.trim());
-      }
-      fields.add(field);
-    }
-  }
-
-  public List<Field> getFields() {
-    return fields;
-  }
-
-  public void setFields(List<Field> fields) {
-    this.fields = fields;
-  }
-
-  /*
-   * For now support name and source Name. Will be nice to support data type
-   * conversion and transform function in future
-   */
-  public static class Field {
-    String name;
-    List<String> sourceEvents;
-    Schema.Type type;
-    List<String> tranformFunc;
-
-    public String getName() {
-      return name;
-    }
-
-    public void setName(String name) {
-      this.name = name;
-    }
-
-    public Type getType() {
-      return type;
-    }
-
-    public void setType(Type type) {
-      this.type = type;
-    }
-
-    public List<String> getSourceEvents() {
-      return sourceEvents;
-    }
-
-    public void setSourceEvents(List<String> sourceEvents) {
-      this.sourceEvents = sourceEvents;
-    }
-
-    public List<String> getTranformFunc() {
-      return tranformFunc;
-    }
-
-    public void setTranformFunc(List<String> tranformFunc) {
-      this.tranformFunc = tranformFunc;
-    }
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinConfigUDF.java
deleted file mode 100644
index 5ece8007ec..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinConfigUDF.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * Simple interface to transform a Generic Record
- */
-public interface JoinConfigUDF {
-
-  /**
-   * @param record
-   * @return
-   */
-  void setJoinConfig(Job job);
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinKeyExtractor.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinKeyExtractor.java
deleted file mode 100644
index 7a6c0f1860..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinKeyExtractor.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import org.apache.avro.generic.GenericRecord;
-
-/**
- * Simple interface to extract the joinKey from a Generic Record
- */
-public interface JoinKeyExtractor {
-  /**
-   * @param sourceName name of the source
-   * @param record record from which the join Key is extracted. join key value is expected to be a
-   *          string.
-   * @return
-   */
-  String extractJoinKey(String sourceName, GenericRecord record);
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseConstants.java
deleted file mode 100644
index 52a0ef4509..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseConstants.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-public enum JoinPhaseConstants {
-  // SCHEMA AND INPUT PER SOURCE actual property access would be {source}.join.input.path
-  JOIN_INPUT_SCHEMA("join.input.schema"), // one schema for each source
-  JOIN_INPUT_PATH("join.input.path"), // one input for each source
-  JOIN_OUTPUT_PATH("join.output.path"),
-  JOIN_OUTPUT_SCHEMA("join.output.schema"),
-  JOIN_SOURCE_NAMES("join.source.names"), // comma separated list of sources
-  JOIN_CONFIG_UDF_CLASS("join.config.udf.class"),
-  JOIN_UDF_CLASS("join.udf.class"),
-  JOIN_KEY_EXTRACTOR_CLASS("join.key.extractor.class"),
-  JOIN_KEY_EXTRACTOR_CONFIG("join.key.extractor.config"), // one for each source
-  JOIN_UDF_CONFIG("join.udf.config"); // one for each source
-
-  String name;
-
-  JoinPhaseConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseJob.java
deleted file mode 100644
index b13d1ec11a..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinPhaseJob.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import static com.linkedin.thirdeye.hadoop.join.JoinPhaseConstants.*;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.StringWriter;
-import java.lang.reflect.Constructor;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroJob;
-import org.apache.avro.mapreduce.AvroKeyOutputFormat;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.CounterGroup;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.collect.Lists;
-
-/**
- * This is a generic join job that can be used to prepare the data for Third
- * Eye. Many teams just need a way to join multiple data sets into one.
- * Currently they do this by using pig script which is highly inefficient, since
- * it does a pair wise join. The idea is as follows there are N named sources,
- * there is a join key common across all these sources. <br/>
- * S1: join key s1_key <br/>
- * S2: join key s2_key <br/>
- * ... <br/>
- * SN: join key sn_key<br/>
- */
-public class JoinPhaseJob extends Configured {
-  private static final Logger LOGGER = LoggerFactory.getLogger(JoinPhaseJob.class);
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-  private String name;
-  private Properties props;
-
-  public JoinPhaseJob(String name, Properties props) {
-    super(new Configuration());
-    this.name = name;
-    this.props = props;
-  }
-
-  public static class GenericJoinMapper
-      extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
-    String sourceName;
-    JoinKeyExtractor joinKeyExtractor;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-
-      LOGGER.info("GenericAvroJoinJob.GenericJoinMapper.setup()");
-      FileSplit fileSplit = (FileSplit) context.getInputSplit();
-      LOGGER.info("split name:" + fileSplit.toString());
-      Configuration configuration = context.getConfiguration();
-
-      try {
-        sourceName = DelegatingAvroKeyInputFormat.getSourceNameFromPath(fileSplit, configuration);
-        LOGGER.info("Input: {} belongs to Source:{}", fileSplit, sourceName);
-        String joinKeyExtractorClass = configuration.get(JOIN_KEY_EXTRACTOR_CLASS.toString());
-
-        Map<String, String> params = new HashMap<>();
-        List<String> sourceNames = Lists.newArrayList(configuration.get(JOIN_SOURCE_NAMES.toString()).split(","));
-        for (String sourceName : sourceNames) {
-          String joinKeyExtractorConfig = configuration.get(sourceName + "." + JOIN_KEY_EXTRACTOR_CONFIG.toString());
-          if (StringUtils.isNotBlank(joinKeyExtractorConfig)) {
-            params.put(sourceName, joinKeyExtractorConfig);
-          }
-        }
-        LOGGER.info("Initializing JoinKeyExtractorClass:{} with params:{}", joinKeyExtractorClass, params);
-        Constructor<?> constructor = Class.forName(joinKeyExtractorClass).getConstructor(Map.class);
-        joinKeyExtractor = (JoinKeyExtractor) constructor.newInstance(params);
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
-
-    }
-
-    @Override
-    public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
-        throws IOException, InterruptedException {
-      GenericRecord record = recordWrapper.datum();
-      MapOutputValue mapOutputValue = new MapOutputValue(record.getSchema().getName(), record);
-      String joinKeyValue = joinKeyExtractor.extractJoinKey(sourceName, record);
-      LOGGER.info("Join Key:{}", joinKeyValue);
-
-      if (!"INVALID".equals(joinKeyValue)) {
-        context.write(new BytesWritable(joinKeyValue.toString().getBytes()),
-            new BytesWritable(mapOutputValue.toBytes()));
-      }
-    }
-
-  }
-
-  public static class GenericJoinReducer
-      extends Reducer<BytesWritable, BytesWritable, AvroKey<GenericRecord>, NullWritable> {
-
-    String statOutputDir;
-    private FileSystem fileSystem;
-    private static TypeReference MAP_STRING_STRING_TYPE = new TypeReference<Map<String, String>>() {
-    };
-    private Map<String, Schema> schemaMap = new HashMap<String, Schema>();
-    private JoinUDF joinUDF;
-    private Map<String, AtomicInteger> countersMap = new HashMap<String, AtomicInteger>();
-    private List<String> sourceNames;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-      Configuration configuration = context.getConfiguration();
-      fileSystem = FileSystem.get(configuration);
-
-      try {
-
-        Map<String, String> schemaJSONMapping = new ObjectMapper().readValue(
-            context.getConfiguration().get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
-
-        LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
-        for (String sourceName : schemaJSONMapping.keySet()) {
-          Schema schema = new Schema.Parser().parse(schemaJSONMapping.get(sourceName));
-          schemaMap.put(sourceName, schema);
-        }
-        sourceNames = Lists.newArrayList(configuration.get(JOIN_SOURCE_NAMES.toString()).split(","));
-        String joinUDFClass = configuration.get(JOIN_UDF_CLASS.toString());
-        Map<String, String> params = new HashMap<>();
-        for (String sourceName : sourceNames) {
-          String joinUdfConfig = configuration.get(sourceName + "." + JOIN_UDF_CONFIG.toString());
-          if (StringUtils.isNotBlank(joinUdfConfig)) {
-            params.put(sourceName, joinUdfConfig);
-          }
-        }
-
-        Constructor<?> constructor = Class.forName(joinUDFClass).getConstructor(Map.class);
-        LOGGER.info("Initializing JoinUDFClass:{} with params:{}", joinUDFClass, params);
-        joinUDF = (JoinUDF) constructor.newInstance(params);
-        String outputSchemaPath = configuration.get(JOIN_OUTPUT_SCHEMA.toString());
-        // Avro schema
-        Schema.Parser parser = new Schema.Parser();
-        Schema outputSchema = parser.parse(fileSystem.open(new Path(outputSchemaPath)));
-        LOGGER.info("Setting outputschema:{}", outputSchema);
-        joinUDF.init(outputSchema);
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
-    }
-
-    @Override
-    public void reduce(BytesWritable joinKeyWritable, Iterable<BytesWritable> recordBytesWritable,
-        Context context) throws IOException, InterruptedException {
-      Map<String, List<GenericRecord>> joinInput = new HashMap<String, List<GenericRecord>>();
-      for (BytesWritable writable : recordBytesWritable) {
-
-        byte[] bytes = writable.copyBytes();
-        MapOutputValue mapOutputValue = MapOutputValue.fromBytes(bytes, schemaMap);
-        String schemaName = mapOutputValue.getSchemaName();
-        if (!joinInput.containsKey(schemaName)) {
-          joinInput.put(schemaName, new ArrayList<GenericRecord>());
-        }
-        joinInput.get(schemaName).add(mapOutputValue.getRecord());
-      }
-
-      int[] exists = new int[sourceNames.size()];
-      for (int i = 0; i < sourceNames.size(); i++) {
-        String source = sourceNames.get(i);
-        if (joinInput.containsKey(source)) {
-          exists[i] = 1;
-        } else {
-          exists[i] = 0;
-        }
-      }
-      String counterName = Arrays.toString(exists);
-      if (!countersMap.containsKey(counterName)) {
-        countersMap.put(counterName, new AtomicInteger(0));
-      }
-      countersMap.get(counterName).incrementAndGet();
-      // invoke the udf and pass in the join data
-      List<GenericRecord> outputRecords =
-          joinUDF.performJoin(new String(joinKeyWritable.copyBytes()), joinInput);
-      if (outputRecords != null) {
-        for (GenericRecord outputRecord : outputRecords) {
-          context.write(new AvroKey<GenericRecord>(outputRecord), NullWritable.get());
-        }
-      }
-    }
-
-    protected void cleanup(Context context) throws IOException, InterruptedException {
-      for (String counterName : countersMap.keySet()) {
-        context.getCounter("DynamicCounter", counterName)
-            .increment(countersMap.get(counterName).get());
-      }
-    }
-  }
-
-  public Job run() throws Exception {
-    Job job = Job.getInstance(getConf());
-    Configuration conf = job.getConfiguration();
-    job.setJobName(name);
-    job.setJarByClass(JoinPhaseJob.class);
-
-    FileSystem fs = FileSystem.get(conf);
-
-    String outputSchemaPath = getAndSetConfiguration(conf, JOIN_OUTPUT_SCHEMA);
-    Schema.Parser parser = new Schema.Parser();
-    Schema outputSchema = parser.parse(fs.open(new Path(outputSchemaPath)));
-    LOGGER.info("{}", outputSchema);
-
-    // Set custom config like adding distributed caches
-    String joinConfigUDFClass = getAndSetConfiguration(conf, JoinPhaseConstants.JOIN_CONFIG_UDF_CLASS);
-    LOGGER.info("Initializing JoinConfigUDFClass:{} with params:{}", joinConfigUDFClass);
-    Constructor<?> constructor = Class.forName(joinConfigUDFClass).getConstructor();
-    JoinConfigUDF joinConfigUDF = (JoinConfigUDF) constructor.newInstance();
-    joinConfigUDF.setJoinConfig(job);
-    getAndSetConfiguration(conf, JOIN_KEY_EXTRACTOR_CLASS);
-    getAndSetConfiguration(conf, JOIN_UDF_CLASS);
-
-    List<String> sourceNames = Lists.newArrayList(
-        getAndSetConfiguration(conf, JoinPhaseConstants.JOIN_SOURCE_NAMES).split(","));
-
-    // Map config
-    job.setMapperClass(GenericJoinMapper.class);
-    // AvroJob.setInputKeySchema(job, unionSchema);
-    job.setInputFormatClass(DelegatingAvroKeyInputFormat.class);
-    job.setMapOutputKeyClass(BytesWritable.class);
-    job.setMapOutputValueClass(BytesWritable.class);
-
-    // Reduce config
-    job.setReducerClass(GenericJoinReducer.class);
-    AvroJob.setOutputKeySchema(job, outputSchema);
-    job.setOutputFormatClass(AvroKeyOutputFormat.class);
-    job.setOutputKeyClass(AvroKey.class);
-    job.setOutputValueClass(NullWritable.class);
-
-    String numReducers = props.getProperty("num.reducers");
-    if (numReducers != null) {
-      job.setNumReduceTasks(Integer.parseInt(numReducers));
-    } else {
-      job.setNumReduceTasks(10);
-    }
-    LOGGER.info("Setting number of reducers : " + job.getNumReduceTasks());
-    Map<String, String> schemaMap = new HashMap<String, String>();
-    Map<String, String> schemaPathMapping = new HashMap<String, String>();
-
-    for (String sourceName : sourceNames) {
-      // load schema for each source
-      LOGGER.info("Loading Schema for {}", sourceName);
-
-      FSDataInputStream schemaStream =
-          fs.open(new Path(getAndCheck(sourceName + "." + JOIN_INPUT_SCHEMA.toString())));
-      Schema schema = new Schema.Parser().parse(schemaStream);
-      schemaMap.put(sourceName, schema.toString());
-      LOGGER.info("Schema for {}:  \n{}", sourceName, schema);
-
-      // configure input data for each source
-      String inputPathDir = getAndCheck(sourceName + "." + JOIN_INPUT_PATH.toString());
-      LOGGER.info("Input path dir for " + sourceName + ": " + inputPathDir);
-      for (String inputPath : inputPathDir.split(",")) {
-        Path input = new Path(inputPath);
-        FileStatus[] listFiles = fs.listStatus(input);
-        boolean isNested = false;
-        for (FileStatus fileStatus : listFiles) {
-          if (fileStatus.isDirectory()) {
-            isNested = true;
-            Path path = fileStatus.getPath();
-            LOGGER.info("Adding input:" + path);
-            FileInputFormat.addInputPath(job, path);
-            schemaPathMapping.put(path.toString(), sourceName);
-          }
-        }
-        if (!isNested) {
-          LOGGER.info("Adding input:" + inputPath);
-          FileInputFormat.addInputPath(job, input);
-          schemaPathMapping.put(input.toString(), sourceName);
-        }
-      }
-    }
-    StringWriter temp = new StringWriter();
-    OBJECT_MAPPER.writeValue(temp, schemaPathMapping);
-    job.getConfiguration().set("schema.path.mapping", temp.toString());
-
-    temp = new StringWriter();
-    OBJECT_MAPPER.writeValue(temp, schemaMap);
-    job.getConfiguration().set("schema.json.mapping", temp.toString());
-
-    Path outputPath = new Path(getAndCheck(JOIN_OUTPUT_PATH.toString()));
-    if (fs.exists(outputPath)) {
-      fs.delete(outputPath, true);
-    }
-    FileOutputFormat.setOutputPath(job, new Path(getAndCheck(JOIN_OUTPUT_PATH.toString())));
-
-    for (Object key : props.keySet()) {
-      conf.set(key.toString(), props.getProperty(key.toString()));
-    }
-
-    job.waitForCompletion(true);
-
-    dumpSummary(job, sourceNames);
-
-    return job;
-  }
-
-  private void dumpSummary(Job job, List<String> sourceNames) throws IOException {
-    System.out.println("Join Input Matrix.");
-    CounterGroup group = job.getCounters().getGroup("DynamicCounter");
-    for (String source : sourceNames) {
-      System.out.print(String.format("%25s\t", source));
-    }
-    if (group != null) {
-      Iterator<Counter> iterator = group.iterator();
-      while (iterator.hasNext()) {
-        Counter counter = iterator.next();
-        String displayName = counter.getDisplayName();
-        String[] split = displayName.replace("[", "").replace("[", "").split(",");
-        for (String str : split) {
-          if (str.trim().equals("1")) {
-            System.out.print(String.format("%25s\t", "1"));
-          } else {
-            System.out.print(String.format("%25s\t", "-"));
-          }
-        }
-      }
-    }
-  }
-
-  private String getAndSetConfiguration(Configuration configuration,
-      JoinPhaseConstants constant) {
-    String value = getAndCheck(constant.toString());
-    configuration.set(constant.toString(), value);
-    return value;
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 1) {
-      throw new IllegalArgumentException("usage: config.properties");
-    }
-
-    Properties props = new Properties();
-    props.load(new FileInputStream(args[0]));
-
-    JoinPhaseJob job = new JoinPhaseJob("aggregate_avro_job", props);
-    job.run();
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinUDF.java
deleted file mode 100644
index c36dfdd045..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/JoinUDF.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-
-public interface JoinUDF {
-  /**
-   * Initializes by providing the output schema.
-   * @param outputSchema
-   */
-  void init(Schema outputSchema);
-
-  /**
-   * @param joinKey common key used to join all the sources
-   * @param joinInput Mapping from sourceName to GenericRecord(s)
-   * @return
-   */
-  List<GenericRecord> performJoin(Object joinKeyVal, Map<String, List<GenericRecord>> joinInput);
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputKey.java
deleted file mode 100644
index 5d168b5766..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputKey.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-public class MapOutputKey {
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputValue.java
deleted file mode 100644
index a7c77838f5..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/join/MapOutputValue.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.join;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.io.BinaryDecoder;
-import org.apache.avro.io.BinaryEncoder;
-import org.apache.avro.io.DecoderFactory;
-import org.apache.avro.io.EncoderFactory;
-
-public class MapOutputValue {
-
-  private static BinaryDecoder binaryDecoder;
-  private String schemaName;
-  private GenericRecord record;
-  private GenericDatumWriter<GenericRecord> WRITER;
-  private EncoderFactory factory = EncoderFactory.get();
-
-  private BinaryEncoder binaryEncoder;
-
-  public MapOutputValue(String schemaName, GenericRecord record) {
-    this.schemaName = schemaName;
-    this.record = record;
-  }
-
-  public String getSchemaName() {
-    return schemaName;
-  }
-
-  public GenericRecord getRecord() {
-    return record;
-  }
-
-  public byte[] toBytes() throws IOException {
-    ByteArrayOutputStream dataStream = new ByteArrayOutputStream();
-    Schema schema = record.getSchema();
-    if (WRITER == null) {
-      WRITER = new GenericDatumWriter<GenericRecord>(schema);
-    }
-    binaryEncoder = factory.directBinaryEncoder(dataStream, binaryEncoder);
-    WRITER.write(record, binaryEncoder);
-
-    // serialize to bytes, we also need to know the schema name when we
-    // process this record on the reducer since reducer gets the record from
-    // multiple mappers. So we first write the schema/source name and then
-    // write the serialized bytes
-    ByteArrayOutputStream out = new ByteArrayOutputStream();
-    DataOutputStream dos = new DataOutputStream(out);
-    dos.writeInt(schema.getName().getBytes().length);
-    dos.write(schema.getName().getBytes());
-    byte[] dataBytes = dataStream.toByteArray();
-
-    dos.writeInt(dataBytes.length);
-    dos.write(dataBytes);
-    return out.toByteArray();
-  }
-
-  public static MapOutputValue fromBytes(byte[] bytes, Map<String, Schema> schemaMap)
-      throws IOException {
-    DataInputStream dataInputStream = new DataInputStream(new ByteArrayInputStream(bytes));
-    int length = dataInputStream.readInt();
-    byte[] sourceNameBytes = new byte[length];
-    dataInputStream.read(sourceNameBytes);
-    String schemaName = new String(sourceNameBytes);
-
-    int recordDataLength = dataInputStream.readInt();
-
-    byte[] recordBytes = new byte[recordDataLength];
-    dataInputStream.read(recordBytes);
-    Schema schema = schemaMap.get(schemaName);
-    GenericRecord record = new GenericData.Record(schema);
-    binaryDecoder = DecoderFactory.get().binaryDecoder(recordBytes, binaryDecoder);
-    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema);
-    gdr.read(record, binaryDecoder);
-    return new MapOutputValue(schemaName, record);
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DefaultSegmentPushUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DefaultSegmentPushUDF.java
deleted file mode 100644
index 1cfca7f1af..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DefaultSegmentPushUDF.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.util.Properties;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultSegmentPushUDF implements SegmentPushUDF {
-  private static final Logger LOG = LoggerFactory.getLogger(DefaultSegmentPushUDF.class);
-
-  @Override
-  public void emitCustomEvents(Properties properties) {
-    // do nothing
-    LOG.info("Default segment push udf");
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java
deleted file mode 100644
index 21c77afcdd..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.TreeSet;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.helix.AccessOption;
-import org.apache.helix.BaseDataAccessor;
-import org.apache.helix.HelixDataAccessor;
-import org.apache.helix.PropertyKey;
-import org.apache.helix.PropertyKey.Builder;
-import org.apache.helix.ZNRecord;
-import org.apache.helix.manager.zk.ZKHelixDataAccessor;
-import org.apache.helix.manager.zk.ZNRecordSerializer;
-import org.apache.helix.manager.zk.ZkBaseDataAccessor;
-import org.apache.helix.manager.zk.ZkClient;
-import org.apache.helix.model.ExternalView;
-import org.apache.helix.model.IdealState;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DeleteOverlappingSegmentsInPinot {
-
-  private static final Logger LOG = LoggerFactory.getLogger(DeleteOverlappingSegmentsInPinot.class);
-
-  public static void main(String[] args) throws Exception {
-    String zkUrl = args[0];
-    String zkCluster = args[1];
-    String tableName = args[2];
-    deleteOverlappingSegments(zkUrl, zkCluster, tableName);
-  }
-
-  private static IdealState computeNewIdealStateAfterDeletingOverlappingSegments(HelixDataAccessor helixDataAccessor, PropertyKey idealStatesKey) {
-    IdealState is = helixDataAccessor.getProperty(idealStatesKey);
-    // compute existing DAILY segments
-    Set<String> daysWithDailySegments = new HashSet<>();
-    for (String segmentName : is.getPartitionSet()) {
-      LOG.info("Segment Name : {}", segmentName);
-      if (segmentName.indexOf("DAILY") > -1) {
-        String[] splits = segmentName.split("_");
-        String endDay = splits[splits.length - 2].substring(0, "yyyy-mm-dd".length());
-        String startDay = splits[splits.length - 3].substring(0, "yyyy-mm-dd".length());
-        LOG.info("Start : {} End : {}", startDay, endDay);
-        daysWithDailySegments.add(startDay);
-      }
-    }
-    // compute list of HOURLY segments to be deleted
-    Set<String> hourlySegmentsToDelete = new TreeSet<>();
-    for (String segmentName : is.getPartitionSet()) {
-      LOG.info("Segment name {}", segmentName);
-      if (segmentName.indexOf("HOURLY") > -1) {
-        String[] splits = segmentName.split("_");
-        String endDay = splits[splits.length - 2].substring(0, "yyyy-mm-dd".length());
-        String startDay = splits[splits.length - 3].substring(0, "yyyy-mm-dd".length());
-        LOG.info("Start : {} End : {}", startDay, endDay);
-        if (daysWithDailySegments.contains(startDay)) {
-          hourlySegmentsToDelete.add(segmentName);
-        }
-      }
-    }
-    LOG.info("HOURLY segments that can be deleted: {}", hourlySegmentsToDelete.size());
-    LOG.info("Hourly segments to delete {}", hourlySegmentsToDelete.toString().replaceAll(",", "\n"));
-    IdealState newIdealState = new IdealState(is.getRecord());
-    for (String hourlySegmentToDelete : hourlySegmentsToDelete) {
-      newIdealState.getRecord().getMapFields().remove(hourlySegmentToDelete);
-    }
-    return newIdealState;
-  }
-
-  public static boolean deleteOverlappingSegments(String zkUrl, String zkCluster, String tableName) {
-    boolean updateSuccessful = false;
-
-    if (!tableName.endsWith("_OFFLINE")) {
-      tableName = tableName + "_OFFLINE";
-    }
-
-    ZkClient zkClient = new ZkClient(zkUrl);
-    ZNRecordSerializer zkSerializer = new ZNRecordSerializer();
-    zkClient.setZkSerializer(zkSerializer);
-    BaseDataAccessor<ZNRecord> baseDataAccessor = new ZkBaseDataAccessor<>(zkClient);
-    HelixDataAccessor helixDataAccessor = new ZKHelixDataAccessor(zkCluster, baseDataAccessor);
-    Builder keyBuilder = helixDataAccessor.keyBuilder();
-    PropertyKey idealStateKey = keyBuilder.idealStates(tableName);
-    PropertyKey externalViewKey = keyBuilder.externalView(tableName);
-    IdealState currentIdealState = helixDataAccessor.getProperty(idealStateKey);
-    byte[] serializeIS = zkSerializer.serialize(currentIdealState.getRecord());
-    String name = tableName + ".idealstate." + System.currentTimeMillis();
-    File outputFile = new File("/tmp", name);
-
-    try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) {
-      IOUtils.write(serializeIS, fileOutputStream);
-    } catch (IOException e) {
-      LOG.error("Exception in delete overlapping segments", e);
-      return updateSuccessful;
-    }
-    LOG.info("Saved current idealstate to {}", outputFile);
-    IdealState newIdealState;
-    do {
-      newIdealState = computeNewIdealStateAfterDeletingOverlappingSegments(helixDataAccessor, idealStateKey);
-      LOG.info("Updating IdealState");
-      updateSuccessful = helixDataAccessor.getBaseDataAccessor().set(idealStateKey.getPath(), newIdealState.getRecord(), newIdealState.getRecord().getVersion(), AccessOption.PERSISTENT);
-      if (updateSuccessful) {
-        int numSegmentsDeleted = currentIdealState.getPartitionSet().size() - newIdealState.getPartitionSet().size();
-        LOG.info("Successfully updated IdealState: Removed segments: {}", (numSegmentsDeleted));
-      }
-    } while (!updateSuccessful);
-
-    try {
-      while (true) {
-        Thread.sleep(10000);
-        ExternalView externalView = helixDataAccessor.getProperty(externalViewKey);
-        IdealState idealState = helixDataAccessor.getProperty(idealStateKey);
-        Set<String> evPartitionSet = externalView.getPartitionSet();
-        Set<String> isPartitionSet = idealState.getPartitionSet();
-        if (evPartitionSet.equals(isPartitionSet)) {
-          LOG.info("Table {} has reached stable state. i.e segments in external view match idealstates", tableName);
-          break;
-        }
-      }
-    } catch (InterruptedException e) {
-      e.printStackTrace();
-    }
-    return updateSuccessful;
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIs.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIs.java
deleted file mode 100644
index 264c36936b..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIs.java
+++ /dev/null
@@ -1,225 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.http.HttpHost;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.HttpClient;
-import org.apache.http.client.methods.HttpDelete;
-import org.apache.http.client.methods.HttpGet;
-import org.apache.http.impl.client.DefaultHttpClient;
-import org.apache.http.util.EntityUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Joiner;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-
-/**
- * Contains APIs which can be used for segment operations
- * such as listing, deleting overlap
- */
-public class SegmentPushControllerAPIs {
-
-  private static Logger LOGGER = LoggerFactory.getLogger(SegmentPushControllerAPIs.class);
-  private String[] controllerHosts;
-  private int controllerPort;
-  private HttpHost controllerHttpHost;
-
-  private static final String OFFLINE_SEGMENTS = "OFFLINE";
-  private static String DAILY_SCHEDULE = "DAILY";
-  private static String HOURLY_SCHEDULE = "HOURLY";
-  private static String SEGMENTS_ENDPOINT = "/segments/";
-  private static String TABLES_ENDPOINT = "/tables/";
-  private static String TYPE_PARAMETER = "?type=offline";
-  private static String UTF_8 = "UTF-8";
-  private static long TIMEOUT = 120000;
-  private static String DATE_JOINER = "-";
-
-  SegmentPushControllerAPIs(String[] controllerHosts, String controllerPort) {
-    this.controllerHosts = controllerHosts;
-    this.controllerPort = Integer.valueOf(controllerPort);
-  }
-
-  public void deleteOverlappingSegments(String tableName, String segmentName) throws IOException {
-    if (segmentName.contains(DAILY_SCHEDULE)) {
-      for (String controllerHost : controllerHosts) {
-        controllerHttpHost = new HttpHost(controllerHost, controllerPort);
-
-        LOGGER.info("Getting overlapped segments for {}*************", segmentName);
-        List<String> overlappingSegments = getOverlappingSegments(tableName, segmentName);
-
-        if (overlappingSegments.isEmpty()) {
-          LOGGER.info("No overlapping segments found");
-        } else {
-          LOGGER.info("Deleting overlapped segments****************");
-          deleteOverlappingSegments(tableName, overlappingSegments);
-        }
-      }
-    } else {
-      LOGGER.info("No overlapping segments to delete for HOURLY");
-    }
-  }
-
-  private List<String> getOverlappingSegments(String tablename, String segmentName) throws IOException {
-
-    List<String> overlappingSegments = new ArrayList<>();
-    String pattern = getOverlapPattern(segmentName, tablename);
-    if (pattern != null) {
-      LOGGER.info("Finding segments overlapping to {} with pattern {}", segmentName, pattern);
-      List<String> allSegments = getAllSegments(tablename, segmentName);
-      overlappingSegments = getOverlappingSegments(allSegments, pattern);
-    }
-    return overlappingSegments;
-  }
-
-  public List<String> getOverlappingSegments(List<String> allSegments, String pattern) {
-    List<String> overlappingSegments = new ArrayList<>();
-    for (String segment : allSegments) {
-      if (segment.startsWith(pattern)) {
-        LOGGER.info("Found overlapping segment {}", segment);
-        overlappingSegments.add(segment);
-      }
-    }
-    return overlappingSegments;
-  }
-
-  public String getOverlapPattern(String segmentName, String tablename) {
-    String pattern = null;
-    // segment name format: table[_*]Name_schedule_startDate_endDate
-    String[] tokens = segmentName.split(ThirdEyeConstants.SEGMENT_JOINER);
-    int size = tokens.length;
-    if (size > 3) {
-      String startDateToken = tokens[size - 2];
-      if (startDateToken.lastIndexOf(DATE_JOINER) != -1) {
-        String datePrefix = startDateToken.substring(0, startDateToken.lastIndexOf(DATE_JOINER));
-        pattern = Joiner.on(ThirdEyeConstants.SEGMENT_JOINER).join(tablename, HOURLY_SCHEDULE, datePrefix);
-      }
-    }
-    return pattern;
-  }
-
-  private List<String> getAllSegments(String tablename, String segmentName) throws IOException {
-    List<String> allSegments = new ArrayList<>();
-
-    HttpClient controllerClient = new DefaultHttpClient();
-    HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tablename, UTF_8));
-    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
-    try {
-      if (res.getStatusLine().getStatusCode() != 200) {
-        throw new IllegalStateException(res.getStatusLine().toString());
-      }
-      InputStream content = res.getEntity().getContent();
-      JsonNode segmentsData = new ObjectMapper().readTree(content);
-
-      if (segmentsData != null) {
-        JsonNode offlineSegments = segmentsData.get(0).get(OFFLINE_SEGMENTS);
-        if (offlineSegments != null) {
-          for (JsonNode segment : offlineSegments) {
-            allSegments.add(segment.asText());
-          }
-        }
-      }
-      LOGGER.info("All segments : {}", allSegments);
-    } finally {
-      if (res.getEntity() != null) {
-        EntityUtils.consume(res.getEntity());
-      }
-    }
-    return allSegments;
-  }
-
-  private boolean isDeleteSuccessful(String tablename, String segmentName) throws IOException {
-
-    boolean deleteSuccessful = false;
-    HttpClient controllerClient = new DefaultHttpClient();
-    // this endpoint gets from ideal state
-    HttpGet req = new HttpGet(TABLES_ENDPOINT + URLEncoder.encode(tablename, UTF_8) + SEGMENTS_ENDPOINT);
-    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
-    try {
-      if (res.getStatusLine().getStatusCode() != 200) {
-        throw new IllegalStateException(res.getStatusLine().toString());
-      }
-      InputStream content = res.getEntity().getContent();
-      String response = IOUtils.toString(content);
-      LOGGER.info("All segments from ideal state {}", response);
-      String decoratedSegmentName = "\\\""+segmentName+"\\\"";
-      LOGGER.info("Decorated segment name {}", decoratedSegmentName);
-      if (!response.contains(decoratedSegmentName)) {
-        deleteSuccessful = true;
-        LOGGER.info("Delete successful");
-      } else {
-        LOGGER.info("Delete failed");
-      }
-    } finally {
-      if (res.getEntity() != null) {
-        EntityUtils.consume(res.getEntity());
-      }
-
-    }
-    return deleteSuccessful;
-
-  }
-
-
-  private void deleteOverlappingSegments(String tablename, List<String> overlappingSegments) throws IOException {
-
-    for (String segment : overlappingSegments) {
-      boolean deleteSuccessful = false;
-      long elapsedTime = 0;
-      long startTimeMillis = System.currentTimeMillis();
-      while (elapsedTime < TIMEOUT && !deleteSuccessful) {
-        deleteSuccessful = deleteSegment(tablename, segment);
-        LOGGER.info("Response {} while deleting segment {} from table {}", deleteSuccessful, segment, tablename);
-        long currentTimeMillis = System.currentTimeMillis();
-        elapsedTime = elapsedTime + (currentTimeMillis - startTimeMillis);
-      }
-    }
-  }
-
-  private boolean deleteSegment(String tablename, String segmentName) throws IOException {
-    boolean deleteSuccessful = false;
-
-    HttpClient controllerClient = new DefaultHttpClient();
-    HttpDelete req = new HttpDelete(SEGMENTS_ENDPOINT + URLEncoder.encode(tablename, UTF_8) + "/"
-        + URLEncoder.encode(segmentName, UTF_8)
-        + TYPE_PARAMETER);
-    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
-    try {
-      if (res == null || res.getStatusLine() == null || res.getStatusLine().getStatusCode() != 200
-          || !isDeleteSuccessful(tablename, segmentName)) {
-        LOGGER.info("Exception in deleting segment, trying again {}", res);
-      } else {
-        deleteSuccessful = true;
-      }
-    } finally {
-      if (res.getEntity() != null) {
-        EntityUtils.consume(res.getEntity());
-      }
-    }
-    return deleteSuccessful;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhase.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhase.java
deleted file mode 100644
index 0d01a4863a..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhase.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.push;
-
-import static com.linkedin.thirdeye.hadoop.push.SegmentPushPhaseConstants.*;
-
-import com.linkedin.pinot.common.utils.SimpleHttpResponse;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.util.Properties;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.linkedin.pinot.common.utils.FileUploadDownloadClient;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-
-/**
- * This class pushed pinot segments generated by SegmentCreation
- * onto the pinot cluster
- */
-public class SegmentPushPhase  extends Configured {
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(SegmentPushPhase.class);
-  private final String name;
-  private final Properties props;
-  private String[] hosts;
-  private String port;
-  private String tablename;
-  private boolean uploadSuccess = true;
-  private String segmentName = null;
-  private String segmentPushUDFClass;
-  SegmentPushControllerAPIs segmentPushControllerAPIs;
-
-
-  public SegmentPushPhase(String jobName, Properties properties) throws Exception {
-    super(new Configuration());
-    name = jobName;
-    props = properties;
-  }
-
-  public void run() throws Exception {
-    Configuration configuration = new Configuration();
-    FileSystem fs = FileSystem.get(configuration);
-
-    long startTime = System.currentTimeMillis();
-
-    String segmentPath = getAndSetConfiguration(configuration, SEGMENT_PUSH_INPUT_PATH);
-    LOGGER.info("Segment path : {}", segmentPath);
-    hosts = getAndSetConfiguration(configuration, SEGMENT_PUSH_CONTROLLER_HOSTS).split(ThirdEyeConstants.FIELD_SEPARATOR);
-    port = getAndSetConfiguration(configuration, SEGMENT_PUSH_CONTROLLER_PORT);
-    tablename = getAndCheck(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString());
-    segmentPushUDFClass = props.getProperty(SEGMENT_PUSH_UDF_CLASS.toString(), DefaultSegmentPushUDF.class.getCanonicalName());
-
-    Path path = new Path(segmentPath);
-    FileStatus[] fileStatusArr = fs.globStatus(path);
-    for (FileStatus fileStatus : fileStatusArr) {
-      if (fileStatus.isDirectory()) {
-        pushDir(fs, fileStatus.getPath());
-      } else {
-        pushOneTarFile(fs, fileStatus.getPath());
-      }
-    }
-    long endTime = System.currentTimeMillis();
-
-    if (uploadSuccess && segmentName != null) {
-      props.setProperty(SEGMENT_PUSH_START_TIME.toString(), String.valueOf(startTime));
-      props.setProperty(SEGMENT_PUSH_END_TIME.toString(), String.valueOf(endTime));
-
-      segmentPushControllerAPIs = new SegmentPushControllerAPIs(hosts, port);
-      LOGGER.info("Deleting segments overlapping to {} from table {}  ", segmentName, tablename);
-      segmentPushControllerAPIs.deleteOverlappingSegments(tablename, segmentName);
-
-      try {
-        LOGGER.info("Initializing SegmentPushUDFClass:{}", segmentPushUDFClass);
-        Constructor<?> constructor = Class.forName(segmentPushUDFClass).getConstructor();
-        SegmentPushUDF segmentPushUDF = (SegmentPushUDF) constructor.newInstance();
-        segmentPushUDF.emitCustomEvents(props);
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
-    }
-
-  }
-
-  public void pushDir(FileSystem fs, Path path) throws Exception {
-    LOGGER.info("******** Now uploading segments tar from dir: {}", path);
-    FileStatus[] fileStatusArr = fs.listStatus(new Path(path.toString() + "/"));
-    for (FileStatus fileStatus : fileStatusArr) {
-      if (fileStatus.isDirectory()) {
-        pushDir(fs, fileStatus.getPath());
-      } else {
-        pushOneTarFile(fs, fileStatus.getPath());
-      }
-    }
-  }
-
-  public void pushOneTarFile(FileSystem fs, Path path) throws Exception {
-    String fileName = path.getName();
-    if (!fileName.endsWith(".tar.gz")) {
-      return;
-    }
-    long length = fs.getFileStatus(path).getLen();
-    try (FileUploadDownloadClient fileUploadDownloadClient = new FileUploadDownloadClient()) {
-      for (String host : hosts) {
-        try (InputStream inputStream = fs.open(path)) {
-          fileName = fileName.split(".tar.gz")[0];
-          if (fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER) != -1) {
-            segmentName = fileName.substring(0, fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER));
-          }
-          LOGGER.info("******** Uploading file: {} to Host: {} and Port: {} *******", fileName, host, port);
-          SimpleHttpResponse simpleHttpResponse = fileUploadDownloadClient.uploadSegment(
-              FileUploadDownloadClient.getUploadSegmentHttpURI(host, Integer.parseInt(port)), fileName, inputStream);
-          int responseCode = simpleHttpResponse.getStatusCode();
-          LOGGER.info("Response code: {}", responseCode);
-          if (responseCode != 200) {
-            uploadSuccess = false;
-          }
-        } catch (Exception e) {
-          LOGGER.error("******** Error Uploading file: {} to Host: {} and Port: {}  *******", fileName, host, port);
-          LOGGER.error("Caught exception during upload", e);
-          throw new RuntimeException("Got Error during send tar files to push hosts!");
-        }
-      }
-    }
-  }
-
-
-  private String getAndSetConfiguration(Configuration configuration,
-      SegmentPushPhaseConstants constant) {
-    String value = getAndCheck(constant.toString());
-    configuration.set(constant.toString(), value);
-    return value;
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 1) {
-      throw new IllegalArgumentException("usage: config.properties");
-    }
-
-    Properties props = new Properties();
-    props.load(new FileInputStream(args[0]));
-
-    SegmentPushPhase job = new SegmentPushPhase("segment_push_job", props);
-    job.run();
-  }
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhaseConstants.java
deleted file mode 100644
index 421c0265d3..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushPhaseConstants.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.push;
-
-/**
- * Class containing properties to be set for segment push
- */
-public enum SegmentPushPhaseConstants {
-
-  SEGMENT_PUSH_INPUT_PATH("segment.push.input.path"),
-  SEGMENT_PUSH_UDF_CLASS("segment.push.udf.class"),
-  SEGMENT_PUSH_CONTROLLER_HOSTS("segment.push.controller.hosts"),
-  SEGMENT_PUSH_CONTROLLER_PORT("segment.push.controller.port"),
-  SEGMENT_PUSH_START_TIME("segment.push.start.time"),
-  SEGMENT_PUSH_END_TIME("segment.push.end.time");
-
-  String name;
-
-  SegmentPushPhaseConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
\ No newline at end of file
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushUDF.java
deleted file mode 100644
index a9e9039482..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/push/SegmentPushUDF.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.util.Properties;
-
-public interface SegmentPushUDF {
-
-  void emitCustomEvents(Properties properties);
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java
deleted file mode 100644
index fe3d171d89..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.segment.creation;
-
-/**
- * Properties to be set for the segment creation phase
- */
-public enum SegmentCreationPhaseConstants {
-
-  SEGMENT_CREATION_INPUT_PATH("segment.creation.input.path"),
-  SEGMENT_CREATION_OUTPUT_PATH("segment.creation.output.path"),
-  SEGMENT_CREATION_THIRDEYE_CONFIG("segment.creation.thirdeye.config"),
-  SEGMENT_CREATION_WALLCLOCK_START_TIME("segment.creation.wallclock.start.time"),
-  SEGMENT_CREATION_WALLCLOCK_END_TIME("segment.creation.wallclock.end.time"),
-  SEGMENT_CREATION_SCHEDULE("segment.creation.schedule"),
-  SEGMENT_CREATION_BACKFILL("segment.creation.backfill");
-
-  String name;
-
-  SegmentCreationPhaseConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
\ No newline at end of file
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java
deleted file mode 100644
index 2446e7f0e4..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java
+++ /dev/null
@@ -1,223 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.segment.creation;
-
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_INPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_OUTPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_SCHEDULE;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_THIRDEYE_CONFIG;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_END_TIME;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_START_TIME;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_BACKFILL;
-
-import java.io.FileInputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.avro.Schema;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobContext;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-/**
- * This class contains the job that generates pinot segments with star tree index
- */
-public class SegmentCreationPhaseJob extends Configured {
-
-  private static final String TEMP = "temp";
-  private static final String DEFAULT_BACKFILL = "false";
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(SegmentCreationPhaseJob.class);
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-  private final String name;
-  private final Properties props;
-
-
-  public SegmentCreationPhaseJob(String jobName, Properties properties) throws Exception {
-    super(new Configuration());
-    getConf().set("mapreduce.job.user.classpath.first", "true");
-    name = jobName;
-    props = properties;
-
-  }
-
-  public Job run() throws Exception {
-
-    Job job = Job.getInstance(getConf());
-
-    job.setJarByClass(SegmentCreationPhaseJob.class);
-    job.setJobName(name);
-
-    FileSystem fs = FileSystem.get(getConf());
-
-    Configuration configuration = job.getConfiguration();
-
-    String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
-    LOGGER.info("Input path : {}", inputSegmentDir);
-    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputSegmentDir);
-    LOGGER.info("Schema : {}", avroSchema);
-    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
-    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
-    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
-    String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
-    LOGGER.info("Output path : {}", outputDir);
-    Path stagingDir = new Path(outputDir, TEMP);
-    LOGGER.info("Staging dir : {}", stagingDir);
-    String segmentWallClockStart = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_START_TIME);
-    LOGGER.info("Segment wallclock start time : {}", segmentWallClockStart);
-    String segmentWallClockEnd = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_END_TIME);
-    LOGGER.info("Segment wallclock end time : {}", segmentWallClockEnd);
-    String schedule = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEDULE);
-    LOGGER.info("Segment schedule : {}", schedule);
-    String isBackfill = props.getProperty(SEGMENT_CREATION_BACKFILL.toString(), DEFAULT_BACKFILL);
-    configuration.set(SEGMENT_CREATION_BACKFILL.toString(), isBackfill);
-    LOGGER.info("Is Backfill : {}", configuration.get(SEGMENT_CREATION_BACKFILL.toString()));
-
-    // Create temporary directory
-    if (fs.exists(stagingDir)) {
-      LOGGER.warn("Found the temp folder, deleting it");
-      fs.delete(stagingDir, true);
-    }
-    fs.mkdirs(stagingDir);
-    fs.mkdirs(new Path(stagingDir + "/input/"));
-
-    // Create output directory
-    if (fs.exists(new Path(outputDir))) {
-      LOGGER.warn("Found the output folder deleting it");
-      fs.delete(new Path(outputDir), true);
-    }
-    fs.mkdirs(new Path(outputDir));
-
-    // Read input files
-    List<FileStatus> inputDataFiles = new ArrayList<>();
-    for (String input : inputSegmentDir.split(",")) {
-      Path inputPathPattern = new Path(input);
-      inputDataFiles.addAll(Arrays.asList(fs.listStatus(inputPathPattern)));
-    }
-    LOGGER.info("size {}", inputDataFiles.size());
-
-    try {
-      for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
-        FileStatus file = inputDataFiles.get(seqId);
-        String completeFilePath = " " + file.getPath().toString() + " " + seqId;
-        Path newOutPutFile = new Path((stagingDir + "/input/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
-        FSDataOutputStream stream = fs.create(newOutPutFile);
-        LOGGER.info("wrote {}", completeFilePath);
-        stream.writeUTF(completeFilePath);
-        stream.flush();
-        stream.close();
-      }
-    } catch (Exception e) {
-      LOGGER.error("Exception while reading input files ", e);
-    }
-
-    job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);
-
-    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
-      job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
-    }
-
-    job.setInputFormatClass(TextInputFormat.class);
-    job.setOutputFormatClass(TextOutputFormat.class);
-
-    job.setMapOutputKeyClass(LongWritable.class);
-    job.setMapOutputValueClass(Text.class);
-
-    FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
-    FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));
-
-    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
-    job.getConfiguration().set(SEGMENT_CREATION_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    job.setMaxReduceAttempts(1);
-    job.setMaxMapAttempts(0);
-    job.setNumReduceTasks(0);
-    for (Object key : props.keySet()) {
-      job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
-    }
-
-    job.waitForCompletion(true);
-    if (!job.isSuccessful()) {
-      throw new RuntimeException("Job failed : " + job);
-    }
-
-    LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
-    FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
-    for (FileStatus segment : segmentArr) {
-      fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
-    }
-
-    // Delete temporary directory.
-    LOGGER.info("Cleanup the working directory.");
-    LOGGER.info("Deleting the dir: {}", stagingDir);
-    fs.delete(stagingDir, true);
-
-    return job;
-  }
-
-  private String getAndSetConfiguration(Configuration configuration,
-      SegmentCreationPhaseConstants constant) {
-    String value = getAndCheck(constant.toString());
-    configuration.set(constant.toString(), value);
-    return value;
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 1) {
-      throw new IllegalArgumentException("usage: config.properties");
-    }
-
-    Properties props = new Properties();
-    props.load(new FileInputStream(args[0]));
-    SegmentCreationPhaseJob job = new SegmentCreationPhaseJob("segment_creation_job", props);
-    job.run();
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java
deleted file mode 100644
index c9e8f85f61..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java
+++ /dev/null
@@ -1,320 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.segment.creation;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
-import com.google.common.base.Joiner;
-import com.linkedin.pinot.common.data.FieldSpec;
-import com.linkedin.pinot.common.data.Schema;
-import com.linkedin.pinot.common.data.StarTreeIndexSpec;
-import com.linkedin.pinot.common.data.TimeGranularitySpec.TimeFormat;
-import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
-import com.linkedin.pinot.core.data.readers.FileFormat;
-import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
-import com.linkedin.pinot.core.segment.creator.StatsCollectorConfig;
-import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
-import com.linkedin.pinot.core.segment.creator.impl.stats.LongColumnPreIndexStatsCollector;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyePinotSchemaUtils;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-import org.apache.avro.file.DataFileStream;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.joda.time.DateTime;
-import org.joda.time.format.DateTimeFormat;
-import org.joda.time.format.DateTimeFormatter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.linkedin.pinot.core.segment.creator.impl.V1Constants.MetadataKeys.Segment.*;
-import static com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.*;
-
-/**
- * Mapper class for SegmentCreation job, which sets configs required for
- * segment generation with star tree index
- */
-public class SegmentCreationPhaseMapReduceJob {
-
-  public static class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
-    private static final Logger LOGGER = LoggerFactory.getLogger(SegmentCreationPhaseMapReduceJob.class);
-    private static ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory());
-
-    private Configuration properties;
-
-    private String inputFilePath;
-    private String outputPath;
-    private String tableName;
-
-    private Path currentHdfsWorkDir;
-    private String currentDiskWorkDir;
-
-    // Temporary HDFS path for local machine
-    private String localHdfsSegmentTarPath;
-
-    private String localDiskSegmentDirectory;
-    private String localDiskSegmentTarPath;
-
-    private ThirdEyeConfig thirdeyeConfig;
-    private Schema schema;
-
-    private Long segmentWallClockStartTime;
-    private Long segmentWallClockEndTime;
-    private String segmentSchedule;
-    private boolean isBackfill;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-
-      currentHdfsWorkDir = FileOutputFormat.getWorkOutputPath(context);
-      currentDiskWorkDir = "pinot_hadoop_tmp";
-
-      // Temporary HDFS path for local machine
-      localHdfsSegmentTarPath = currentHdfsWorkDir + "/segmentTar";
-
-      // Temporary DISK path for local machine
-      localDiskSegmentDirectory = currentDiskWorkDir + "/segments/";
-      localDiskSegmentTarPath = currentDiskWorkDir + "/segmentsTar/";
-      new File(localDiskSegmentTarPath).mkdirs();
-
-      LOGGER.info("*********************************************************************");
-      LOGGER.info("Configurations : {}", context.getConfiguration().toString());
-      LOGGER.info("*********************************************************************");
-      LOGGER.info("Current HDFS working dir : {}", currentHdfsWorkDir);
-      LOGGER.info("Current DISK working dir : {}", new File(currentDiskWorkDir).getAbsolutePath());
-      LOGGER.info("*********************************************************************");
-      properties = context.getConfiguration();
-
-      outputPath = properties.get(SEGMENT_CREATION_OUTPUT_PATH.toString());
-
-      thirdeyeConfig = OBJECT_MAPPER.readValue(properties.get(SEGMENT_CREATION_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-      LOGGER.info(thirdeyeConfig.encode());
-      schema = ThirdeyePinotSchemaUtils.createSchema(thirdeyeConfig);
-      tableName = thirdeyeConfig.getCollection();
-
-      segmentWallClockStartTime = Long.valueOf(properties.get(SEGMENT_CREATION_WALLCLOCK_START_TIME.toString()));
-      segmentWallClockEndTime = Long.valueOf(properties.get(SEGMENT_CREATION_WALLCLOCK_END_TIME.toString()));
-      segmentSchedule = properties.get(SEGMENT_CREATION_SCHEDULE.toString());
-      isBackfill = Boolean.valueOf(properties.get(SEGMENT_CREATION_BACKFILL.toString()));
-    }
-
-    @Override
-    public void cleanup(Context context) throws IOException, InterruptedException {
-      FileUtils.deleteQuietly(new File(currentDiskWorkDir));
-    }
-
-    @Override
-    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
-
-      String line = value.toString();
-      String[] lineSplits = line.split(" ");
-
-      LOGGER.info("*********************************************************************");
-      LOGGER.info("mapper input : {}", value);
-      LOGGER.info("Path to output : {}", outputPath);
-      LOGGER.info("Table name : {}", tableName);
-      LOGGER.info("num lines : {}", lineSplits.length);
-
-      for (String split : lineSplits) {
-        LOGGER.info("Command line : {}", split);
-      }
-      LOGGER.info("*********************************************************************");
-
-      if (lineSplits.length != 3) {
-        throw new RuntimeException("Input to the mapper is malformed, please contact the pinot team");
-      }
-      inputFilePath = lineSplits[1].trim();
-
-      LOGGER.info("*********************************************************************");
-      LOGGER.info("input data file path : {}", inputFilePath);
-      LOGGER.info("local hdfs segment tar path: {}", localHdfsSegmentTarPath);
-      LOGGER.info("local disk segment path: {}", localDiskSegmentDirectory);
-      LOGGER.info("*********************************************************************");
-
-      try {
-        createSegment(inputFilePath, schema, lineSplits[2]);
-        LOGGER.info("finished segment creation job successfully");
-      } catch (Exception e) {
-        LOGGER.error("Got exceptions during creating segments!", e);
-      }
-
-      context.write(new LongWritable(Long.parseLong(lineSplits[2])),
-          new Text(FileSystem.get(new Configuration()).listStatus(new Path(localHdfsSegmentTarPath + "/"))[0].getPath().getName()));
-      LOGGER.info("finished the job successfully");
-    }
-
-    private String createSegment(String dataFilePath, Schema schema, String seqId) throws Exception {
-      final FileSystem fs = FileSystem.get(new Configuration());
-      final Path hdfsDataPath = new Path(dataFilePath);
-      final File dataPath = new File(currentDiskWorkDir, "data");
-      if (dataPath.exists()) {
-        dataPath.delete();
-      }
-      dataPath.mkdir();
-      final Path localFilePath = new Path(dataPath + "/" + hdfsDataPath.getName());
-      fs.copyToLocalFile(hdfsDataPath, localFilePath);
-
-      LOGGER.info("Data schema is : {}", schema);
-
-      // Set segment generator config
-      LOGGER.info("*********************************************************************");
-      SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
-      segmentGeneratorConfig.setTableName(tableName);
-      segmentGeneratorConfig.setInputFilePath(new File(dataPath, hdfsDataPath.getName()).getAbsolutePath());
-      LOGGER.info("Setting input path {}", segmentGeneratorConfig.getInputFilePath());
-      segmentGeneratorConfig.setFormat(FileFormat.AVRO);
-      segmentGeneratorConfig.setSegmentNamePostfix(seqId);
-      segmentGeneratorConfig.setOutDir(localDiskSegmentDirectory);
-      LOGGER.info("Setting enableStarTreeIndex");
-      String minTime = ThirdEyeConstants.DATE_TIME_FORMATTER.print(segmentWallClockStartTime);
-      String maxTime = ThirdEyeConstants.DATE_TIME_FORMATTER.print(segmentWallClockEndTime);
-      LOGGER.info("Wall clock time : min {} max {}", minTime, maxTime);
-      LOGGER.info("isBackfill : {}", isBackfill);
-      if (isBackfill) {
-        // if case of backfill, we have to ensure that segment name is same as original segment name
-        // we are retaining the segment name through the  backfill and derived_column_transformation phases
-        // in the output files generated
-        // backfill will generated original_segment_name.avro
-        // derived_column_transformation will generate original_segment_name-m-00000.avro etc
-        String segmentName = hdfsDataPath.getName().split("-(m|r)-[0-9]{5}")[0];
-        segmentName = segmentName.split(ThirdEyeConstants.AVRO_SUFFIX)[0];
-        segmentGeneratorConfig.setSegmentName(segmentName);
-      } else {
-        String segmentName =
-            Joiner.on(ThirdEyeConstants.SEGMENT_JOINER).join(tableName, segmentSchedule, minTime, maxTime, seqId);
-        segmentGeneratorConfig.setSegmentName(segmentName);
-      }
-      LOGGER.info("Setting segment name {}", segmentGeneratorConfig.getSegmentName());
-
-
-      // Set star tree config
-      StarTreeIndexSpec starTreeIndexSpec = new StarTreeIndexSpec();
-
-      // _raw dimensions should not be in star tree split order
-      // if a dimension has a _topk column, we will include only
-      // the column with topk, and skip _raw column for materialization in star tree
-      Set<String> skipMaterializationForDimensions = new HashSet<>();
-      Set<String> transformDimensionsSet = thirdeyeConfig.getTransformDimensions();
-      LOGGER.info("Dimensions with _topk column {}", transformDimensionsSet);
-      for (String topkTransformDimension : transformDimensionsSet) {
-        skipMaterializationForDimensions.add(topkTransformDimension);
-        LOGGER.info("Adding {} to skipMaterialization set", topkTransformDimension);
-      }
-      starTreeIndexSpec.setSkipMaterializationForDimensions(skipMaterializationForDimensions);
-      LOGGER.info("Setting skipMaterializationForDimensions {}", skipMaterializationForDimensions);
-
-      if (thirdeyeConfig.getSplit() != null) {
-        starTreeIndexSpec.setMaxLeafRecords(thirdeyeConfig.getSplit().getThreshold());
-        LOGGER.info("Setting split threshold to {}", starTreeIndexSpec.getMaxLeafRecords());
-        List<String> splitOrder = thirdeyeConfig.getSplit().getOrder();
-        if (splitOrder != null) {
-          LOGGER.info("Removing from splitOrder, any dimensions which are also in skipMaterializationForDimensions");
-          splitOrder.removeAll(skipMaterializationForDimensions);
-          starTreeIndexSpec.setDimensionsSplitOrder(splitOrder);
-        }
-        LOGGER.info("Setting splitOrder {}", splitOrder);
-      }
-      segmentGeneratorConfig.enableStarTreeIndex(starTreeIndexSpec);
-      LOGGER.info("*********************************************************************");
-
-      // Set time for SIMPLE_DATE_FORMAT case
-      String sdfPrefix = TimeFormat.SIMPLE_DATE_FORMAT.toString() + ThirdEyeConstants.SDF_SEPARATOR;
-      if (thirdeyeConfig.getTime().getTimeFormat().startsWith(sdfPrefix)) {
-
-        String pattern = thirdeyeConfig.getTime().getTimeFormat().split(ThirdEyeConstants.SDF_SEPARATOR)[1];
-        DateTimeFormatter sdfFormatter = DateTimeFormat.forPattern(pattern);
-
-        File localAvroFile = new File(dataPath, hdfsDataPath.getName());
-        LongColumnPreIndexStatsCollector timeColumnStatisticsCollector =
-            getTimeColumnStatsCollector(schema, localAvroFile);
-        String startTime = timeColumnStatisticsCollector.getMinValue().toString();
-        String endTime = timeColumnStatisticsCollector.getMaxValue().toString();
-        startTime = String.valueOf(DateTime.parse(startTime, sdfFormatter).getMillis());
-        endTime = String.valueOf(DateTime.parse(endTime, sdfFormatter).getMillis());
-
-        // set start time
-        segmentGeneratorConfig.getCustomProperties().put(SEGMENT_START_TIME, startTime);
-        // set end time
-        segmentGeneratorConfig.getCustomProperties().put(SEGMENT_END_TIME, endTime);
-        // set time unit
-        segmentGeneratorConfig.setSegmentTimeUnit(TimeUnit.MILLISECONDS);
-      }
-
-      // Generate segment
-      SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
-      driver.init(segmentGeneratorConfig);
-      driver.build();
-
-      // Tar the segment directory into file.
-      String segmentName = null;
-      File localDiskSegmentDirectoryFile = new File(localDiskSegmentDirectory);
-      for (File file : localDiskSegmentDirectoryFile.listFiles()) {
-        segmentName = file.getName();
-        if (segmentName.startsWith(tableName)) {
-          break;
-        }
-      }
-      String localSegmentPath = new File(localDiskSegmentDirectory, segmentName).getAbsolutePath();
-
-      String localTarPath = localDiskSegmentTarPath + "/" + segmentName + ".tar.gz";
-      LOGGER.info("Trying to tar the segment to: {}", localTarPath);
-      TarGzCompressionUtils.createTarGzOfDirectory(localSegmentPath, localTarPath);
-      String hdfsTarPath = localHdfsSegmentTarPath + "/" + segmentName + ".tar.gz";
-
-      LOGGER.info("*********************************************************************");
-      LOGGER.info("Copy from : {} to {}", localTarPath, hdfsTarPath);
-      LOGGER.info("*********************************************************************");
-      fs.copyFromLocalFile(true, true, new Path(localTarPath), new Path(hdfsTarPath));
-      return segmentName;
-    }
-
-    private LongColumnPreIndexStatsCollector getTimeColumnStatsCollector(Schema schema, File localAvroFile)
-        throws FileNotFoundException, IOException {
-      String timeColumnName = schema.getTimeColumnName();
-      FieldSpec spec =  schema.getTimeFieldSpec();
-      LOGGER.info("Spec for " + timeColumnName + " is " + spec);
-      LongColumnPreIndexStatsCollector timeColumnStatisticsCollector = new LongColumnPreIndexStatsCollector(spec.getName(), new StatsCollectorConfig(schema, null));
-      LOGGER.info("StatsCollector :" + timeColumnStatisticsCollector);
-      DataFileStream<GenericRecord> dataStream =
-          new DataFileStream<GenericRecord>(new FileInputStream(localAvroFile), new GenericDatumReader<GenericRecord>());
-      while (dataStream.hasNext()) {
-        GenericRecord next = dataStream.next();
-        timeColumnStatisticsCollector.collect(next.get(timeColumnName));
-      }
-      dataStream.close();
-      timeColumnStatisticsCollector.seal();
-
-      return timeColumnStatisticsCollector;
-    }
-
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPair.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPair.java
deleted file mode 100644
index 434b71a21d..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPair.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-/**
- * Class to manage dimension value and metric values pairs
- * The order of elements is determined based on the metric value -
- * Lesser metric value is treated as greater in ordering,
- * so that it gets removed from the fixed size PriorityQueue first
- */
-public class DimensionValueMetricPair implements Comparable<DimensionValueMetricPair>{
-
-  private Object dimensionValue;
-  private Number metricValue;
-
-  public DimensionValueMetricPair(Object dimensionValue, Number metricValue) {
-    this.dimensionValue = dimensionValue;
-    this.metricValue = metricValue;
-  }
-
-  public Object getDimensionValue() {
-    return dimensionValue;
-  }
-  public void setDimensionValue(Object dimensionValue) {
-    this.dimensionValue = dimensionValue;
-  }
-  public Number getMetricValue() {
-    return metricValue;
-  }
-  public void setMetricValue(Number metricValue) {
-    this.metricValue = metricValue;
-  }
-
-
-  @Override
-  public int compareTo(DimensionValueMetricPair other) {
-    return other.metricValue.intValue() - this.metricValue.intValue();
-  }
-
-  @Override
-  public String toString() {
-    return "[" + dimensionValue + "=" + metricValue + "]";
-  }
-
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKDimensionValues.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKDimensionValues.java
deleted file mode 100644
index 8286d704f8..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKDimensionValues.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-/**
- * Class to create and manage top k values for every dimension
- */
-public class TopKDimensionValues {
-  private Map<String, Set<String>> topKDimensions;
-
-  public TopKDimensionValues() {
-    topKDimensions = new HashMap<>();
-  }
-
-  public Map<String, Set<String>> getTopKDimensions() {
-    return topKDimensions;
-  }
-
-  public void setTopKDimensions(Map<String, Set<String>> topKDimensions) {
-    this.topKDimensions = topKDimensions;
-  }
-
-  /**
-   * Add a top k value for a dimension
-   * @param dimension
-   * @param value
-   */
-  public void addValue(String dimension, String value) {
-    if (topKDimensions.get(dimension) == null) {
-      topKDimensions.put(dimension, new HashSet<String>());
-    }
-    topKDimensions.get(dimension).add(value);
-  }
-
-  public void addAllValues(String dimension, Set<String> values) {
-    if (topKDimensions.get(dimension) == null) {
-      topKDimensions.put(dimension, new HashSet<String>());
-    }
-    topKDimensions.get(dimension).addAll(values);
-  }
-
-  /**
-   * Add all top k values for all dimensions from a TopKDimensionValues object
-   * @param valuesFile
-   */
-  public void addMap(TopKDimensionValues valuesFile) {
-    Map<String, Set<String>> values = valuesFile.getTopKDimensions();
-    for (Entry<String, Set<String>> entry : values.entrySet()) {
-      if (topKDimensions.get(entry.getKey()) == null) {
-        topKDimensions.put(entry.getKey(), new HashSet<String>());
-      }
-      topKDimensions.get(entry.getKey()).addAll(entry.getValue());
-    }
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConfig.java
deleted file mode 100644
index 64a2c258bd..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConfig.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * This class contains the config needed by TopKPhase
- * and the methods to obtain the config from the ThirdEyeConfig
- */
-public class TopKPhaseConfig {
-  private List<String> dimensionNames;
-  private List<DimensionType> dimensionTypes;
-  private List<String> metricNames;
-  private List<MetricType> metricTypes;
-  private Map<String, Double> metricThresholds;
-  private Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec;
-  private Map<String, List<String>> whitelist;
-
-  private static final double DEFAULT_METRIC_THRESHOLD = 0.01;
-
-  public TopKPhaseConfig() {
-
-  }
-
-  /**
-   * @param dimensionNames
-   * @param dimensionTypes
-   * @param metricNames
-   * @param metricTypes
-   * @param metricThresholds
-   * @param whitelist
-   */
-  public TopKPhaseConfig(List<String> dimensionNames, List<DimensionType> dimensionTypes,
-      List<String> metricNames, List<MetricType> metricTypes,
-      Map<String, Double> metricThresholds, Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec,
-      Map<String, List<String>> whitelist) {
-    super();
-    this.dimensionNames = dimensionNames;
-    this.dimensionTypes = dimensionTypes;
-    this.metricNames = metricNames;
-    this.metricTypes = metricTypes;
-    this.metricThresholds = metricThresholds;
-    this.topKDimensionToMetricsSpec = topKDimensionToMetricsSpec;
-    this.whitelist = whitelist;
-  }
-
-  public List<String> getDimensionNames() {
-    return dimensionNames;
-  }
-
-  public List<DimensionType> getDimensionTypes() {
-    return dimensionTypes;
-  }
-
-  public List<String> getMetricNames() {
-    return metricNames;
-  }
-
-  public List<MetricType> getMetricTypes() {
-    return metricTypes;
-  }
-
-  public Map<String, Double> getMetricThresholds() {
-    return metricThresholds;
-  }
-
-  public Map<String, TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpec() {
-    return topKDimensionToMetricsSpec;
-  }
-
-  public Map<String, List<String>> getWhitelist() {
-    return whitelist;
-  }
-
-  /**
-   * This method generates necessary top k config for TopKPhase job from
-   * ThirdEye config
-   * @param config
-   * @return
-   */
-  public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
-
-    //metrics
-    List<String> metricNames = new ArrayList<>(config.getMetrics().size());
-    List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
-    for (MetricSpec spec : config.getMetrics()) {
-      metricNames.add(spec.getName());
-      metricTypes.add(spec.getType());
-    }
-
-    // dimensions
-    List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
-    List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
-    for (DimensionSpec spec : config.getDimensions()) {
-      dimensionNames.add(spec.getName());
-      dimensionTypes.add(spec.getDimensionType());
-    }
-
-    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
-    Map<String, Double> metricThresholds = new HashMap<>();
-    Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
-    Map<String, List<String>> whitelist = new HashMap<>();
-
-    // topk
-    if (topKWhitelist != null) {
-      // metric thresholds
-      if (topKWhitelist.getThreshold() != null) {
-        metricThresholds = topKWhitelist.getThreshold();
-      }
-      for (String metric : metricNames) {
-        if (metricThresholds.get(metric) == null) {
-          metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
-        }
-      }
-
-      // topk
-      if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
-        for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
-          topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
-        }
-      }
-
-      // whitelist
-      if (topKWhitelist.getWhitelist() != null) {
-        whitelist.putAll(topKWhitelist.getWhitelist());
-      }
-    }
-
-    return new TopKPhaseConfig(dimensionNames, dimensionTypes, metricNames, metricTypes, metricThresholds,
-        topKDimensionToMetricsSpec, whitelist);
-  }
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConstants.java
deleted file mode 100644
index 9c94ba0cf8..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseConstants.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-/**
- * This class contains the properties to be set for topk phase
- */
-public enum TopKPhaseConstants {
-  TOPK_PHASE_INPUT_PATH("topk.phase.input.path"),
-  TOPK_PHASE_OUTPUT_PATH("topk.phase.output.path"),
-  TOPK_PHASE_THIRDEYE_CONFIG("topk.rollup.phase.thirdeye.config");
-
-  String name;
-
-  TopKPhaseConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseJob.java
deleted file mode 100644
index ba88d31396..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseJob.java
+++ /dev/null
@@ -1,463 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import static com.linkedin.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_INPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_OUTPUT_PATH;
-import static com.linkedin.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_THIRDEYE_CONFIG;
-
-import java.io.DataOutput;
-import java.io.File;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Properties;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.commons.collections.MapUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.collect.MinMaxPriorityQueue;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAggregateMetricUtils;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-/**
- * This phase reads avro input, and produces a file with top k values for dimensions
- *
- * Map:
- * Map phase reads avro records, and for each record emits
- * Key=(Dimension name, Dimension Value) Value=(Metrics)
- * For each record, map also emits a
- * Key=(ALL, ALL) Value=(Metrics)
- * This is used for computing the metric sums in the reduce phase
- *
- * Combine:
- * Combine phase receives Key=(DimensionName, DimensionValue)
- * from each map, and aggregates the metric values. This phase
- * helps in reducing the traffic sent to reducer
- *
- * Reduce:
- * We strictly use just 1 reducer.
- * Reduce phase receives Key=(DimensionName, DimensionValue)
- * and aggregates the metric values
- * The very first key received is (ALL, ALL) with helps us compute total metric sum
- * These metric sums are used to check metric thresholds of other
- * (dimensionName, dimensionValue) pairs. If none of the metric
- * thresholds pass, the pair is discarded.
- * In the cleanup, top k dimension values are picked for each dimension
- * based on the metric value
- * The top k dimension values for each dimension are written to a file
- *
- */
-public class TopKPhaseJob extends Configured {
-  private static final Logger LOGGER = LoggerFactory.getLogger(TopKPhaseJob.class);
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-  private static final String TOPK_ALL_DIMENSION_NAME = "0";
-  private static final String TOPK_ALL_DIMENSION_VALUE = "0";
-
-  private String name;
-  private Properties props;
-
-  /**
-   * @param name
-   * @param props
-   */
-  public TopKPhaseJob(String name, Properties props) {
-    super(new Configuration());
-    this.name = name;
-    this.props = props;
-  }
-
-  public static class TopKPhaseMapper
-      extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
-
-    private TopKPhaseConfig config;
-    ThirdEyeConfig thirdeyeConfig;
-    private List<String> dimensionNames;
-    private List<DimensionType> dimensionTypes;
-    private List<String> metricNames;
-    private List<MetricType> metricTypes;
-    private int numMetrics;
-    BytesWritable keyWritable;
-    BytesWritable valWritable;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-      LOGGER.info("TopKPhaseJob.TopKPhaseMapper.setup()");
-      Configuration configuration = context.getConfiguration();
-      try {
-        thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-        config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-        dimensionNames = config.getDimensionNames();
-        dimensionTypes = config.getDimensionTypes();
-        metricNames = config.getMetricNames();
-        metricTypes = config.getMetricTypes();
-        numMetrics = metricNames.size();
-        valWritable = new BytesWritable();
-        keyWritable = new BytesWritable();
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
-    }
-
-
-    @Override
-    public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
-        throws IOException, InterruptedException {
-
-      // input record
-      GenericRecord inputRecord = key.datum();
-
-      // read metrics
-      Number[] metricValues = new Number[numMetrics];
-      for (int i = 0; i < numMetrics; i++) {
-        String metricName = metricNames.get(i);
-        Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName);
-        metricValues[i] = metricValue;
-      }
-      TopKPhaseMapOutputValue valWrapper = new TopKPhaseMapOutputValue(metricValues, metricTypes);
-      byte[] valBytes = valWrapper.toBytes();
-      valWritable.set(valBytes, 0, valBytes.length);
-
-      // read dimensions
-      for (int i = 0; i < dimensionNames.size(); i++) {
-        String dimensionName = dimensionNames.get(i);
-        DimensionType dimensionType = dimensionTypes.get(i);
-        Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
-
-        TopKPhaseMapOutputKey keyWrapper = new TopKPhaseMapOutputKey(dimensionName, dimensionValue, dimensionType);
-        byte[] keyBytes = keyWrapper.toBytes();
-        keyWritable.set(keyBytes, 0, keyBytes.length);
-        context.write(keyWritable, valWritable);
-      }
-      TopKPhaseMapOutputKey allKeyWrapper = new TopKPhaseMapOutputKey(TOPK_ALL_DIMENSION_NAME, TOPK_ALL_DIMENSION_VALUE, DimensionType.STRING);
-      byte[] allKeyBytes = allKeyWrapper.toBytes();
-      keyWritable.set(allKeyBytes, 0, allKeyBytes.length);
-      context.write(keyWritable, valWritable);
-    }
-
-    @Override
-    public void cleanup(Context context) throws IOException, InterruptedException {
-
-    }
-  }
-
-  public static class TopKPhaseCombiner
-    extends Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable> {
-
-    private TopKPhaseConfig config;
-    ThirdEyeConfig thirdeyeConfig;
-    private List<MetricType> metricTypes;
-    private int numMetrics;
-    BytesWritable keyWritable;
-    BytesWritable valWritable;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-      LOGGER.info("TopKPhaseJob.TopKPhaseCombiner.setup()");
-      Configuration configuration = context.getConfiguration();
-      try {
-        thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-        config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-        metricTypes = config.getMetricTypes();
-        numMetrics = metricTypes.size();
-        valWritable = new BytesWritable();
-        keyWritable = new BytesWritable();
-
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
-    }
-
-    @Override
-    public void reduce(BytesWritable key, Iterable<BytesWritable> values, Context context)
-    throws IOException, InterruptedException {
-
-      Number[] aggMetricValues = new Number[numMetrics];
-      Arrays.fill(aggMetricValues, 0);
-
-      for (BytesWritable value : values) {
-        TopKPhaseMapOutputValue valWrapper = TopKPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes);
-        Number[] metricValues = valWrapper.getMetricValues();
-        ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
-      }
-
-      TopKPhaseMapOutputValue valWrapper = new TopKPhaseMapOutputValue(aggMetricValues, metricTypes);
-      byte[] valBytes = valWrapper.toBytes();
-      valWritable.set(valBytes, 0, valBytes.length);
-
-      context.write(key, valWritable);
-    }
-  }
-
-  public static class TopKPhaseReducer
-      extends Reducer<BytesWritable, BytesWritable, NullWritable, NullWritable> {
-
-    private FileSystem fileSystem;
-    private Configuration configuration;
-
-    private ThirdEyeConfig thirdeyeConfig;
-    private TopKPhaseConfig config;
-    private List<String> dimensionNames;
-    private List<String> metricNames;
-    private List<MetricType> metricTypes;
-    private Map<String, Integer> metricToIndexMapping;
-    private int numMetrics;
-    BytesWritable keyWritable;
-    BytesWritable valWritable;
-    Number[] metricSums;
-    private Map<String, Map<Object, Number[]>> dimensionNameToValuesMap;
-    private TopKDimensionValues topkDimensionValues;
-    private Map<String, Double> metricThresholds;
-    private Map<String, Integer> thresholdPassCount;
-    private Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecMap;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-
-      LOGGER.info("TopKPhaseJob.TopKPhaseReducer.setup()");
-
-      configuration = context.getConfiguration();
-      fileSystem = FileSystem.get(configuration);
-      try {
-        thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-        config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-        LOGGER.info("Metric Thresholds form config {}", config.getMetricThresholds());
-        metricThresholds = config.getMetricThresholds();
-        topKDimensionToMetricsSpecMap = config.getTopKDimensionToMetricsSpec();
-        dimensionNames = config.getDimensionNames();
-        metricNames = config.getMetricNames();
-        metricTypes = config.getMetricTypes();
-
-        numMetrics = metricNames.size();
-
-        metricToIndexMapping = new HashMap<>();
-        for (int i = 0; i < numMetrics; i ++) {
-          metricToIndexMapping.put(metricNames.get(i), i);
-        }
-
-        dimensionNameToValuesMap = new HashMap<>();
-        thresholdPassCount = new HashMap<>();
-        for (String dimension : dimensionNames) {
-          dimensionNameToValuesMap.put(dimension, new HashMap<Object, Number[]>());
-          thresholdPassCount.put(dimension, 0);
-        }
-        topkDimensionValues = new TopKDimensionValues();
-
-        keyWritable = new BytesWritable();
-        valWritable = new BytesWritable();
-
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
-    }
-
-    @Override
-    public void reduce(BytesWritable key, Iterable<BytesWritable> values,
-        Context context) throws IOException, InterruptedException {
-
-      TopKPhaseMapOutputKey keyWrapper = TopKPhaseMapOutputKey.fromBytes(key.getBytes());
-      String dimensionName = keyWrapper.getDimensionName();
-      Object dimensionValue = keyWrapper.getDimensionValue();
-
-      // Get aggregate metric values for dimension name value pair
-      Number[] aggMetricValues = new Number[numMetrics];
-      Arrays.fill(aggMetricValues, 0);
-      for (BytesWritable value : values) {
-        TopKPhaseMapOutputValue valWrapper = TopKPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes);
-        Number[] metricValues = valWrapper.getMetricValues();
-        ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
-      }
-
-      // Metric sums case
-      if (dimensionName.equals(TOPK_ALL_DIMENSION_NAME) && dimensionValue.equals(TOPK_ALL_DIMENSION_VALUE)) {
-        LOGGER.info("Setting metric sums");
-        metricSums = new Number[numMetrics];
-        metricSums = Arrays.copyOf(aggMetricValues, numMetrics);
-        return;
-      }
-
-      // Check metric percentage threshold
-      if (MapUtils.isNotEmpty(metricThresholds)) {
-        boolean isPassThreshold = false;
-        for (int i = 0; i < numMetrics; i++) {
-          String metric = metricNames.get(i);
-          double metricValue = aggMetricValues[i].doubleValue();
-          double metricSum = metricSums[i].doubleValue();
-          double metricThresholdPercentage = metricThresholds.get(metric);
-          if (metricValue >= (metricSum * metricThresholdPercentage / 100)) {
-            isPassThreshold = true;
-            thresholdPassCount.put(dimensionName, thresholdPassCount.get(dimensionName) + 1);
-            break;
-          }
-        }
-        if (!isPassThreshold) {
-          return;
-        }
-        dimensionNameToValuesMap.get(dimensionName).put(dimensionValue, aggMetricValues);
-      }
-    }
-
-    @Override
-    protected void cleanup(Context context) throws IOException, InterruptedException {
-
-      for (String dimension : dimensionNames) {
-
-        LOGGER.info("{} records passed metric threshold for dimension {}", thresholdPassCount.get(dimension), dimension);
-
-        // Get top k
-        TopKDimensionToMetricsSpec topkSpec = topKDimensionToMetricsSpecMap.get(dimension);
-        if (topkSpec != null && topkSpec.getDimensionName() != null && topkSpec.getTopk() != null) {
-
-          // Get top k for each metric specified
-          Map<String, Integer> topkMetricsMap = topkSpec.getTopk();
-          for (Entry<String, Integer> topKEntry : topkMetricsMap.entrySet()) {
-
-            String metric = topKEntry.getKey();
-            int k = topKEntry.getValue();
-            MinMaxPriorityQueue<DimensionValueMetricPair> topKQueue = MinMaxPriorityQueue.maximumSize(k).create();
-
-            Map<Object, Number[]> dimensionToMetricsMap = dimensionNameToValuesMap.get(dimension);
-            for (Entry<Object, Number[]> entry : dimensionToMetricsMap.entrySet()) {
-              topKQueue.add(new DimensionValueMetricPair(entry.getKey(), entry.getValue()[metricToIndexMapping.get(metric)]));
-            }
-            LOGGER.info("Picking Top {} values for {} based on Metric {} : {}", k, dimension, metric, topKQueue);
-            for (DimensionValueMetricPair pair : topKQueue) {
-              topkDimensionValues.addValue(dimension, String.valueOf(pair.getDimensionValue()));
-            }
-          }
-        }
-      }
-
-      if (topkDimensionValues.getTopKDimensions().size() > 0) {
-        String topkValuesPath = configuration.get(TOPK_PHASE_OUTPUT_PATH.toString());
-        LOGGER.info("Writing top k values to {}",topkValuesPath);
-        FSDataOutputStream topKDimensionValuesOutputStream = fileSystem.create(
-            new Path(topkValuesPath + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE));
-        OBJECT_MAPPER.writeValue((DataOutput) topKDimensionValuesOutputStream, topkDimensionValues);
-        topKDimensionValuesOutputStream.close();
-      }
-    }
-  }
-
-  public Job run() throws Exception {
-    Job job = Job.getInstance(getConf());
-    job.setJobName(name);
-    job.setJarByClass(TopKPhaseJob.class);
-
-    Configuration configuration = job.getConfiguration();
-    FileSystem fs = FileSystem.get(configuration);
-
-    // Properties
-    LOGGER.info("Properties {}", props);
-
-     // Input Path
-    String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH);
-    LOGGER.info("Input path dir: " + inputPathDir);
-    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
-      LOGGER.info("Adding input:" + inputPath);
-      Path input = new Path(inputPath);
-      FileInputFormat.addInputPath(job, input);
-    }
-
-    // Output path
-    Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH));
-    LOGGER.info("Output path dir: " + outputPath.toString());
-    if (fs.exists(outputPath)) {
-      fs.delete(outputPath, true);
-    }
-    FileOutputFormat.setOutputPath(job, outputPath);
-
-    // Schema
-    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
-    LOGGER.info("Schema : {}", avroSchema.toString(true));
-
-    // ThirdEyeConfig
-    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
-    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
-        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
-    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
-    job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    // Map config
-    job.setMapperClass(TopKPhaseMapper.class);
-    job.setInputFormatClass(AvroKeyInputFormat.class);
-    job.setMapOutputKeyClass(BytesWritable.class);
-    job.setMapOutputValueClass(BytesWritable.class);
-
-    // Combiner
-    job.setCombinerClass(TopKPhaseCombiner.class);
-
-     // Reduce config
-    job.setReducerClass(TopKPhaseReducer.class);
-    job.setOutputKeyClass(NullWritable.class);
-    job.setOutputValueClass(NullWritable.class);
-    job.setNumReduceTasks(1);
-
-    job.waitForCompletion(true);
-
-    return job;
-  }
-
-
-  private String getAndSetConfiguration(Configuration configuration,
-      TopKPhaseConstants constant) {
-    String value = getAndCheck(constant.toString());
-    configuration.set(constant.toString(), value);
-    return value;
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java
deleted file mode 100644
index 491c0c2d77..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-
-/**
- * Wrapper for the key generated by mapper in TopKPhase
- */
-public class TopKPhaseMapOutputKey {
-
-  String dimensionName;
-  Object dimensionValue;
-  DimensionType dimensionType;
-
-  public TopKPhaseMapOutputKey(String dimensionName, Object dimensionValue, DimensionType dimensionType) {
-    this.dimensionName = dimensionName;
-    this.dimensionValue = dimensionValue;
-    this.dimensionType = dimensionType;
-  }
-
-  public String getDimensionName() {
-    return dimensionName;
-  }
-
-  public Object getDimensionValue() {
-    return dimensionValue;
-  }
-
-  public DimensionType getDimensionType() {
-    return dimensionType;
-  }
-
-  /**
-   * Converts a TopKPhaseMapOutputKey to a bytes buffer
-   * @return
-   * @throws IOException
-   */
-  public byte[] toBytes() throws IOException {
-
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    DataOutputStream dos = new DataOutputStream(baos);
-    byte[] bytes;
-    // dimension name
-    bytes = dimensionName.getBytes();
-    dos.writeInt(bytes.length);
-    dos.write(bytes);
-
-    // dimension type
-    bytes = dimensionType.toString().getBytes();
-    dos.writeInt(bytes.length);
-    dos.write(bytes);
-
-    // dimension value
-    DimensionType.writeDimensionValueToOutputStream(dos, dimensionValue, dimensionType);
-    baos.close();
-    dos.close();
-    return baos.toByteArray();
-  }
-
-  /**
-   * Constructs a TopKPhaseMapOutputKey from a bytes buffer
-   * @param buffer
-   * @return
-   * @throws IOException
-   */
-  public static TopKPhaseMapOutputKey fromBytes(byte[] buffer) throws IOException {
-    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
-    int length;
-    byte[] bytes;
-
-    // dimension name
-    length = dis.readInt();
-    bytes = new byte[length];
-    dis.read(bytes);
-    String dimensionName = new String(bytes);
-
-    // dimension type
-    length = dis.readInt();
-    bytes = new byte[length];
-    dis.read(bytes);
-    String dimensionTypeString = new String(bytes);
-    DimensionType dimensionType = DimensionType.valueOf(dimensionTypeString);
-
-    // dimension value
-    Object dimensionValue = DimensionType.readDimensionValueFromDataInputStream(dis, dimensionType);
-
-    TopKPhaseMapOutputKey wrapper;
-    wrapper = new TopKPhaseMapOutputKey(dimensionName, dimensionValue, dimensionType);
-    return wrapper;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java
deleted file mode 100644
index 8e40316186..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-
-/**
- * Wrapper for value generated by mapper in TopKPhase
- */
-public class TopKPhaseMapOutputValue {
-
-  Number[] metricValues;
-  List<MetricType> metricTypes;
-
-  public TopKPhaseMapOutputValue(Number[] metricValues, List<MetricType> metricTypes) {
-    this.metricValues = metricValues;
-    this.metricTypes = metricTypes;
-  }
-
-  public Number[] getMetricValues() {
-    return metricValues;
-  }
-
-  /**
-   * Converts TopkPhaseMapOutputValue to a buffer of bytes
-   * @return
-   * @throws IOException
-   */
-  public byte[] toBytes() throws IOException {
-
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    DataOutputStream dos = new DataOutputStream(baos);
-
-    // metric values
-    dos.writeInt(metricValues.length);
-    for (int i = 0; i < metricValues.length; i++) {
-      Number number = metricValues[i];
-      MetricType metricType = metricTypes.get(i);
-      MetricType.writeMetricValueToDataOutputStream(dos, number, metricType);
-    }
-
-    baos.close();
-    dos.close();
-    return baos.toByteArray();
-  }
-
-  /**
-   * Constructs TopKPhaseMapOutputValue from bytes buffer
-   * @param buffer
-   * @param metricTypes
-   * @return
-   * @throws IOException
-   */
-  public static TopKPhaseMapOutputValue fromBytes(byte[] buffer, List<MetricType> metricTypes) throws IOException {
-    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
-    int length;
-
-    // metric values
-    length = dis.readInt();
-    Number[] metricValues = new Number[length];
-
-    for (int i = 0 ; i < length; i++) {
-      MetricType metricType = metricTypes.get(i);
-      Number metricValue = MetricType.readMetricValueFromDataInputStream(dis, metricType);
-      metricValues[i] = metricValue;
-    }
-
-    TopKPhaseMapOutputValue wrapper;
-    wrapper = new TopKPhaseMapOutputValue(metricValues, metricTypes);
-    return wrapper;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java
deleted file mode 100644
index d9f4f97706..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import org.apache.hadoop.mapreduce.Job;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultTransformConfigUDF implements TransformConfigUDF {
-  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultTransformConfigUDF.class);
-
-  @Override
-  public void setTransformConfig(Job job) {
-
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformUDF.java
deleted file mode 100644
index c4c6f591e8..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DefaultTransformUDF.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultTransformUDF implements TransformUDF {
-  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultTransformUDF.class);
-
-  private Schema outputSchema;
-
-  public DefaultTransformUDF() {
-
-  }
-
-  @Override
-  public void init(Schema outputSchema) {
-    this.outputSchema = outputSchema;
-  }
-
-  @Override
-  public GenericRecord transformRecord(String sourceName, GenericRecord record) {
-    // Default implementation returns input record as is
-    return record;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java
deleted file mode 100644
index 0966a2f210..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.avro.Schema;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyRecordReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.codehaus.jackson.JsonParseException;
-import org.codehaus.jackson.map.JsonMappingException;
-import org.codehaus.jackson.map.ObjectMapper;
-
-import org.codehaus.jackson.type.TypeReference;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DelegatingAvroKeyInputFormat<T> extends AvroKeyInputFormat<T> {
-  private static final Logger LOGGER = LoggerFactory.getLogger(DelegatingAvroKeyInputFormat.class);
-  private static TypeReference MAP_STRING_STRING_TYPE = new TypeReference<Map<String, String>>() {
-  };
-
-  public org.apache.hadoop.mapreduce.RecordReader<org.apache.avro.mapred.AvroKey<T>, NullWritable> createRecordReader(
-      InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
-    LOGGER.info("DelegatingAvroKeyInputFormat.createRecordReader()  for split:{}", split);
-    FileSplit fileSplit = (FileSplit) split;
-    Configuration configuration = context.getConfiguration();
-    String sourceName = getSourceNameFromPath(fileSplit, configuration);
-    LOGGER.info("Source Name for path {} : {}", fileSplit.getPath(), sourceName);
-    Map<String, String> schemaJSONMapping = new ObjectMapper()
-        .readValue(configuration.get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
-
-    LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
-
-    String sourceSchemaJSON = schemaJSONMapping.get(sourceName);
-
-    Schema schema = new Schema.Parser().parse(sourceSchemaJSON);
-    return new AvroKeyRecordReader<T>(schema);
-  }
-
-  public static String getSourceNameFromPath(FileSplit fileSplit, Configuration configuration)
-      throws IOException, JsonParseException, JsonMappingException {
-    String content = configuration.get("schema.path.mapping");
-    Map<String, String> schemaPathMapping =
-        new ObjectMapper().readValue(content, MAP_STRING_STRING_TYPE);
-    LOGGER.info("Schema Path Mapping: {}", schemaPathMapping);
-
-    String sourceName = null;
-    for (String path : schemaPathMapping.keySet()) {
-      if (fileSplit.getPath().toString().indexOf(path) > -1) {
-        sourceName = schemaPathMapping.get(path);
-        break;
-      }
-    }
-    return sourceName;
-  };
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformConfigUDF.java
deleted file mode 100644
index ee66870a11..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformConfigUDF.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import org.apache.hadoop.mapreduce.Job;
-
-/**
- * Simple interface to transform a Generic Record
- */
-public interface TransformConfigUDF {
-
-  /**
-   * @param record
-   * @return
-   */
-  void setTransformConfig(Job job);
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformPhaseJob.java
deleted file mode 100644
index a4ee3bd6a4..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformPhaseJob.java
+++ /dev/null
@@ -1,289 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import static com.linkedin.thirdeye.hadoop.transform.TransformPhaseJobConstants.*;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.StringWriter;
-import java.lang.reflect.Constructor;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapred.AvroValue;
-import org.apache.avro.mapreduce.AvroJob;
-import org.apache.avro.mapreduce.AvroKeyOutputFormat;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-
-/**
- * Transform job to transform input files from one schema to another
- * Required properties:
- * transform.input.schema=<path to input schema on hdfs>
- * transform.output.schema=<path to output schema on hdfs>
- * transform.input.path=<path to input data files on hdfs>
- * transform.output.path=<output data path on hdfs>
- * transform.udf.class=<UDF class to perform transformation>
- */
-public class TransformPhaseJob extends Configured {
-  private static final Logger LOGGER = LoggerFactory.getLogger(TransformPhaseJob.class);
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-  private String name;
-  private Properties props;
-
-  public TransformPhaseJob(String name, Properties props) {
-    super(new Configuration());
-    this.name = name;
-    this.props = props;
-  }
-
-  public static class GenericTransformMapper
-      extends Mapper<AvroKey<GenericRecord>, NullWritable, IntWritable, AvroValue<GenericRecord>> {
-
-    TransformUDF transformUDF;
-    int numReducers;
-    int reducerKey;
-    String sourceName;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-
-      LOGGER.info("GenericAvroTransformJob.GenericTransformMapper.setup()");
-
-      Configuration configuration = context.getConfiguration();
-      FileSystem fs = FileSystem.get(configuration);
-
-      FileSplit fileSplit = (FileSplit) context.getInputSplit();
-      LOGGER.info("split name:" + fileSplit.toString());
-      sourceName = DelegatingAvroKeyInputFormat.getSourceNameFromPath(fileSplit, configuration);
-      LOGGER.info("Input: {} belongs to Source:{}", fileSplit, sourceName);
-
-      String numTransformReducers = configuration.get(TRANSFORM_NUM_REDUCERS.toString());
-      numReducers = Integer.parseInt(numTransformReducers);
-      reducerKey = 1;
-      try {
-
-        String transformUDFClass = configuration.get(TRANSFORM_UDF.toString());
-        LOGGER.info("Initializing TransformUDFClass:{} with params:{}", transformUDFClass);
-        Constructor<?> constructor = Class.forName(transformUDFClass).getConstructor();
-        transformUDF = (TransformUDF) constructor.newInstance();
-
-        String outputSchemaPath = configuration.get(TRANSFORM_OUTPUT_SCHEMA.toString());
-        Schema.Parser parser = new Schema.Parser();
-        Schema outputSchema = parser.parse(fs.open(new Path(outputSchemaPath)));
-
-        transformUDF.init(outputSchema);
-      } catch (Exception e) {
-        throw new IOException(e);
-      }
-
-    }
-
-    @Override
-    public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
-        throws IOException, InterruptedException {
-      GenericRecord record = recordWrapper.datum();
-      GenericRecord outputRecord = transformUDF.transformRecord(sourceName, record);
-
-      if (outputRecord != null) {
-
-        IntWritable key = new IntWritable(reducerKey);
-        reducerKey = (reducerKey == numReducers) ? (1) : (reducerKey + 1);
-        context.write(key, new AvroValue<GenericRecord>(outputRecord));
-      }
-
-    }
-
-    @Override
-    public void cleanup(Context context) throws IOException, InterruptedException {
-
-    }
-
-  }
-
-  public static class GenericTransformReducer
-      extends Reducer<IntWritable, AvroValue<GenericRecord>, AvroKey<GenericRecord>, NullWritable> {
-    @Override
-    public void reduce(IntWritable key, Iterable<AvroValue<GenericRecord>> values, Context context)
-        throws IOException, InterruptedException {
-      for (AvroValue<GenericRecord> value : values) {
-        GenericRecord record = value.datum();
-        context.write(new AvroKey<GenericRecord>(record), NullWritable.get());
-      }
-    }
-  }
-
-  public Job run() throws Exception {
-
-    // Set job config
-    Job job = Job.getInstance(getConf());
-    Configuration configuration = job.getConfiguration();
-    job.setJobName(name);
-    job.setJarByClass(TransformPhaseJob.class);
-
-    // Set custom config like adding distributed caches
-    String transformConfigUDFClass = getAndSetConfiguration(configuration, TRANSFORM_CONFIG_UDF);
-    LOGGER.info("Initializing TransformConfigUDFClass:{} with params:{}", transformConfigUDFClass);
-    Constructor<?> constructor = Class.forName(transformConfigUDFClass).getConstructor();
-    TransformConfigUDF transformConfigUDF = (TransformConfigUDF) constructor.newInstance();
-    transformConfigUDF.setTransformConfig(job);
-
-    FileSystem fs = FileSystem.get(configuration);
-
-    // Set outputSchema, output path
-    String outputSchemaPath = getAndSetConfiguration(configuration, TRANSFORM_OUTPUT_SCHEMA);
-    Schema.Parser parser = new Schema.Parser();
-    Schema outputSchema = parser.parse(fs.open(new Path(outputSchemaPath)));
-    LOGGER.info("{}", outputSchema);
-
-    String outputPathDir = getAndSetConfiguration(configuration, TRANSFORM_OUTPUT_PATH);
-    Path outputPath = new Path(outputPathDir);
-    if (fs.exists(outputPath)) {
-      fs.delete(outputPath, true);
-    }
-    FileOutputFormat.setOutputPath(job, new Path(outputPathDir));
-
-    // Set input schema, input path for every source
-    String sources = getAndSetConfiguration(configuration, TRANSFORM_SOURCE_NAMES);
-    List<String> sourceNames = Arrays.asList(sources.split(","));
-    Map<String, String> schemaMap = new HashMap<String, String>();
-    Map<String, String> schemaPathMapping = new HashMap<String, String>();
-
-    for (String sourceName : sourceNames) {
-
-      // load schema for each source
-      LOGGER.info("Loading Schema for {}", sourceName);
-      FSDataInputStream schemaStream =
-          fs.open(new Path(getAndCheck(sourceName + "." + TRANSFORM_INPUT_SCHEMA.toString())));
-      Schema schema = new Schema.Parser().parse(schemaStream);
-      schemaMap.put(sourceName, schema.toString());
-      LOGGER.info("Schema for {}:  \n{}", sourceName, schema);
-
-      // configure input data for each source
-      String inputPathDir = getAndCheck(sourceName + "." + TRANSFORM_INPUT_PATH.toString());
-      LOGGER.info("Input path dir for " + sourceName + ": " + inputPathDir);
-      for (String inputPath : inputPathDir.split(",")) {
-        Path input = new Path(inputPath);
-        FileStatus[] listFiles = fs.listStatus(input);
-        boolean isNested = false;
-        for (FileStatus fileStatus : listFiles) {
-          if (fileStatus.isDirectory()) {
-            isNested = true;
-            Path path = fileStatus.getPath();
-            LOGGER.info("Adding input:" + path);
-            FileInputFormat.addInputPath(job, path);
-            schemaPathMapping.put(path.toString(), sourceName);
-          }
-        }
-        if (!isNested) {
-          LOGGER.info("Adding input:" + inputPath);
-          FileInputFormat.addInputPath(job, input);
-          schemaPathMapping.put(input.toString(), sourceName);
-        }
-      }
-    }
-    StringWriter temp = new StringWriter();
-    OBJECT_MAPPER.writeValue(temp, schemaPathMapping);
-    job.getConfiguration().set("schema.path.mapping", temp.toString());
-
-    temp = new StringWriter();
-    OBJECT_MAPPER.writeValue(temp, schemaMap);
-    job.getConfiguration().set("schema.json.mapping", temp.toString());
-
-    // set transform UDF class
-    getAndSetConfiguration(configuration, TRANSFORM_UDF);
-
-    // set reducers
-    String numReducers = getAndSetConfiguration(configuration, TRANSFORM_NUM_REDUCERS);
-    if (numReducers != null) {
-      job.setNumReduceTasks(Integer.parseInt(numReducers));
-    } else {
-      job.setNumReduceTasks(10);
-    }
-    LOGGER.info("Setting number of reducers : " + job.getNumReduceTasks());
-
-    // Map config
-    job.setMapperClass(GenericTransformMapper.class);
-    // AvroJob.setInputKeySchema(job, inputSchema);
-    job.setInputFormatClass(DelegatingAvroKeyInputFormat.class);
-    job.setMapOutputKeyClass(IntWritable.class);
-    job.setMapOutputValueClass(AvroValue.class);
-    AvroJob.setMapOutputValueSchema(job, outputSchema);
-
-    // Reducer config
-    job.setReducerClass(GenericTransformReducer.class);
-    job.setOutputKeyClass(AvroKey.class);
-    job.setOutputValueClass(NullWritable.class);
-    AvroJob.setOutputKeySchema(job, outputSchema);
-    job.setOutputFormatClass(AvroKeyOutputFormat.class);
-    job.waitForCompletion(true);
-
-    return job;
-  }
-
-  private String getAndSetConfiguration(Configuration configuration,
-      TransformPhaseJobConstants constant) {
-    String value = getAndCheck(constant.toString());
-    configuration.set(constant.toString(), value);
-    return value;
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-  public static void main(String[] args) throws Exception {
-    if (args.length != 1) {
-      throw new IllegalArgumentException("usage: config.properties");
-    }
-
-    Properties props = new Properties();
-    props.load(new FileInputStream(args[0]));
-
-    TransformPhaseJob job = new TransformPhaseJob("transform_phase_job", props);
-    job.run();
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformPhaseJobConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformPhaseJobConstants.java
deleted file mode 100644
index e0dce8716f..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformPhaseJobConstants.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-public enum TransformPhaseJobConstants {
-  TRANSFORM_INPUT_SCHEMA("transform.input.schema"),
-  TRANSFORM_INPUT_PATH("transform.input.path"),
-  TRANSFORM_OUTPUT_PATH("transform.output.path"),
-  TRANSFORM_OUTPUT_SCHEMA("transform.output.schema"),
-  TRANSFORM_SOURCE_NAMES("transform.source.names"),
-  TRANSFORM_UDF("transform.udf.class"),
-  TRANSFORM_CONFIG_UDF("transform.config.udf.class"),
-  TRANSFORM_NUM_REDUCERS("transform.num.reducers");
-
-  String name;
-
-  TransformPhaseJobConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformUDF.java
deleted file mode 100644
index c7e8a22aa7..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/transform/TransformUDF.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.transform;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-
-/**
- * Simple interface to transform a Generic Record
- */
-public interface TransformUDF {
-
-  /**
-   * Initializes by providing the output schema.
-   * @param outputSchema
-   */
-  void init(Schema outputSchema);
-
-  /**
-   * @param record
-   * @return
-   */
-  GenericRecord transformRecord(String sourceName, GenericRecord record);
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtils.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtils.java
deleted file mode 100644
index d2ac28149b..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtils.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.util;
-
-import java.util.List;
-
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-
-/**
- * Class to aggregate metric values
- */
-public class ThirdeyeAggregateMetricUtils {
-
-  /**
-   * Aggregates an array of metricValues into an aggregate array
-   * @param metricTypes - metric types array
-   * @param aggMetricValues - aggregated metric values
-   * @param metricValues - metric values to add
-   */
-  public static void aggregate(List<MetricType> metricTypes, Number[] aggMetricValues, Number[] metricValues) {
-    int numMetrics = aggMetricValues.length;
-    for (int i = 0; i < numMetrics; i++) {
-      MetricType metricType = metricTypes.get(i);
-      switch (metricType) {
-        case SHORT:
-          aggMetricValues[i] = aggMetricValues[i].shortValue() + metricValues[i].shortValue();
-          break;
-        case INT:
-          aggMetricValues[i] = aggMetricValues[i].intValue() + metricValues[i].intValue();
-          break;
-        case FLOAT:
-          aggMetricValues[i] = aggMetricValues[i].floatValue() + metricValues[i].floatValue();
-          break;
-        case DOUBLE:
-          aggMetricValues[i] = aggMetricValues[i].doubleValue() + metricValues[i].doubleValue();
-          break;
-        case LONG:
-        default:
-          aggMetricValues[i] = aggMetricValues[i].longValue() + metricValues[i].longValue();
-          break;
-      }
-    }
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAvroUtils.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAvroUtils.java
deleted file mode 100644
index bee5cd4d80..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAvroUtils.java
+++ /dev/null
@@ -1,267 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.util;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.zip.GZIPInputStream;
-
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Field;
-import org.apache.avro.SchemaBuilder;
-import org.apache.avro.SchemaBuilder.BaseFieldTypeBuilder;
-import org.apache.avro.SchemaBuilder.FieldAssembler;
-import org.apache.avro.SchemaBuilder.RecordBuilder;
-import org.apache.avro.file.DataFileStream;
-import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.commons.collections.Predicate;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.base.Joiner;
-import com.google.common.collect.Lists;
-import com.linkedin.pinot.common.data.FieldSpec;
-import com.linkedin.pinot.common.data.FieldSpec.DataType;
-import com.linkedin.pinot.core.data.readers.AvroRecordReader;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-
-/**
- * This class contains methods to extract avro schema, and get
- * avro reader from the avro files
- */
-public class ThirdeyeAvroUtils {
-
-  private static Logger LOGGER = LoggerFactory.getLogger(ThirdeyeAvroUtils.class);
-  /**
-   * extracts avro schema from avro file
-   * @param avroFile
-   * @return
-   * @throws FileNotFoundException
-   * @throws IOException
-   */
-  public static Schema extractSchemaFromAvro(Path avroFile) throws IOException {
-    DataFileStream<GenericRecord> dataStreamReader = getAvroReader(avroFile);
-    Schema avroSchema = dataStreamReader.getSchema();
-    dataStreamReader.close();
-    return avroSchema;
-  }
-
-  /**
-   * Constructs an avro schema from a pinot schema
-   * @param schema
-   * @return
-   */
-  public static Schema constructAvroSchemaFromPinotSchema(com.linkedin.pinot.common.data.Schema schema) {
-    Schema avroSchema = null;
-
-    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record("record");
-    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
-
-    for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
-      String fieldName = fieldSpec.getName();
-      DataType dataType = fieldSpec.getDataType();
-      BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(fieldName).type().nullable();
-      switch (dataType) {
-        case BOOLEAN:
-          fieldAssembler = baseFieldTypeBuilder.booleanType().noDefault();
-          break;
-        case DOUBLE:
-          fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
-          break;
-        case FLOAT:
-          fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
-          break;
-        case INT:
-          fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
-          break;
-        case LONG:
-          fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
-          break;
-        case STRING:
-          fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
-          break;
-        default:
-          break;
-      }
-    }
-
-    avroSchema = fieldAssembler.endRecord();
-    LOGGER.info("Avro Schema {}", avroSchema.toString(true));
-
-    return avroSchema;
-  }
-
-  private static DataFileStream<GenericRecord> getAvroReader(Path avroFile) throws IOException {
-    FileSystem fs = FileSystem.get(new Configuration());
-    if(avroFile.getName().endsWith("gz")) {
-      return new DataFileStream<GenericRecord>(new GZIPInputStream(fs.open(avroFile)), new GenericDatumReader<GenericRecord>());
-    } else {
-      return new DataFileStream<GenericRecord>(fs.open(avroFile), new GenericDatumReader<GenericRecord>());
-    }
-  }
-
-  /**
-   * Extracts the datatype of a field from the avro schema, given the name of the field
-   * @param fieldname
-   * @param schema
-   * @return
-   */
-  public static String getDataTypeForField(String fieldname, Schema schema) {
-    Field field = schema.getField(fieldname);
-    if (field == null) {
-      throw new IllegalStateException("Field " + fieldname + " does not exist in schema");
-    }
-
-    final Schema.Type type = field.schema().getType();
-    if (type == Schema.Type.ARRAY) {
-      throw new RuntimeException("TODO: validate correctness after commit b19a0965044d3e3f4f1541cc4cd9ea60b96a4b99");
-    }
-
-    return DataType.valueOf(extractSchemaFromUnionIfNeeded(field.schema()).getType()).toString();
-  }
-
-  /**
-   * Helper removed from AvroRecordReader in b19a0965044d3e3f4f1541cc4cd9ea60b96a4b99
-   *
-   * @param fieldSchema
-   * @return
-   */
-  private static org.apache.avro.Schema extractSchemaFromUnionIfNeeded(org.apache.avro.Schema fieldSchema) {
-    if ((fieldSchema).getType() == Schema.Type.UNION) {
-      fieldSchema = ((org.apache.avro.Schema) CollectionUtils.find(fieldSchema.getTypes(), new Predicate() {
-        @Override
-        public boolean evaluate(Object object) {
-          return ((org.apache.avro.Schema) object).getType() != Schema.Type.NULL;
-        }
-      }));
-    }
-    return fieldSchema;
-  }
-
-  /**
-   * Finds the avro file in the input folder, and returns its avro schema
-   * @param inputPathDir
-   * @return
-   * @throws IOException
-   */
-  public static Schema getSchema(String inputPathDir) throws IOException  {
-    FileSystem fs = FileSystem.get(new Configuration());
-    Schema avroSchema = null;
-    for (String input : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
-      Path inputPath = new Path(input);
-      for (FileStatus fileStatus : fs.listStatus(inputPath)) {
-        if (fileStatus.isFile() && fileStatus.getPath().getName().endsWith(ThirdEyeConstants.AVRO_SUFFIX)) {
-          LOGGER.info("Extracting schema from {}", fileStatus.getPath());
-          avroSchema = extractSchemaFromAvro(fileStatus.getPath());
-          break;
-        }
-      }
-    }
-    return avroSchema;
-  }
-
-  /**
-   * Constructs dimensionTypes property string from the dimension names with the help of the avro schema
-   * @param dimensionNamesProperty
-   * @param avroSchema
-   * @return
-   */
-  public static String getDimensionTypesProperty(String dimensionNamesProperty, Schema avroSchema) {
-    List<String> dimensionTypesFromSchema = new ArrayList<>();
-    if (StringUtils.isNotBlank(dimensionNamesProperty)) {
-      List<String> dimensionNamesFromConfig = Lists.newArrayList(dimensionNamesProperty.split(ThirdEyeConstants.FIELD_SEPARATOR));
-      for (String dimensionName : dimensionNamesFromConfig) {
-        dimensionTypesFromSchema.add(ThirdeyeAvroUtils.getDataTypeForField(dimensionName, avroSchema));
-      }
-    }
-    return Joiner.on(ThirdEyeConstants.FIELD_SEPARATOR).join(dimensionTypesFromSchema);
-  }
-
-
-  /**
-   * Constructs metricTypes property string from the metric names with the help of the avro schema
-   * @param metricNamesProperty
-   * @param avroSchema
-   * @return
-   */
-  public static String getMetricTypesProperty(String metricNamesProperty, String metricTypesProperty, Schema avroSchema) {
-    List<String> metricTypesFromSchema = new ArrayList<>();
-    List<String> metricNamesFromConfig = Lists.newArrayList(metricNamesProperty.split(ThirdEyeConstants.FIELD_SEPARATOR));
-    for (String metricName : metricNamesFromConfig) {
-      if (ThirdEyeConstants.AUTO_METRIC_COUNT.equals(metricName)) {
-        metricTypesFromSchema.add(DataType.LONG.toString());
-        continue;
-      }
-      metricTypesFromSchema.add(ThirdeyeAvroUtils.getDataTypeForField(metricName, avroSchema));
-    }
-    String validatedMetricTypesProperty = Joiner.on(ThirdEyeConstants.FIELD_SEPARATOR).join(metricTypesFromSchema);
-    if (metricTypesProperty != null) {
-      List<String> metricTypesFromConfig = Lists.newArrayList(metricTypesProperty.split(ThirdEyeConstants.FIELD_SEPARATOR));
-      if (metricTypesFromConfig.size() == metricTypesFromSchema.size()) {
-        for (int i = 0; i < metricNamesFromConfig.size(); i++) {
-          String metricName = metricNamesFromConfig.get(i);
-          String metricTypeFromConfig = metricTypesFromConfig.get(i);
-          String metricTypeFromSchema = metricTypesFromSchema.get(i);
-          if (!metricTypeFromConfig.equals(metricTypeFromSchema)) {
-            LOGGER.warn("MetricType {} defined in config for metric {}, does not match dataType {} from avro schema",
-                metricTypeFromConfig, metricName, metricTypeFromSchema);
-          }
-        }
-        validatedMetricTypesProperty = metricTypesProperty;
-      }
-    }
-    return validatedMetricTypesProperty;
-  }
-
-  public static Object getDimensionFromRecord(GenericRecord record, String dimensionName) {
-    Object dimensionValue = record.get(dimensionName);
-    if (dimensionValue == null) {
-      String dataType = getDataTypeForField(dimensionName, record.getSchema());
-      DimensionType dimensionType = DimensionType.valueOf(dataType);
-      dimensionValue = dimensionType.getDefaultNullvalue();
-    }
-    return dimensionValue;
-  }
-
-  public static Number getMetricFromRecord(GenericRecord record, String metricName) {
-    Number metricValue = (Number) record.get(metricName);
-    if (metricValue == null) {
-      metricValue = ThirdEyeConstants.EMPTY_NUMBER;
-    }
-    return metricValue;
-  }
-
-  public static Number getMetricFromRecord(GenericRecord record, String metricName, MetricType metricType) {
-    Number metricValue = (Number) record.get(metricName);
-    if (metricValue == null) {
-      metricValue = metricType.getDefaultNullValue();
-    }
-    return metricValue;
-  }
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyePinotSchemaUtils.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyePinotSchemaUtils.java
deleted file mode 100644
index 613280629a..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/util/ThirdeyePinotSchemaUtils.java
+++ /dev/null
@@ -1,127 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.util;
-
-import java.io.IOException;
-import java.util.Set;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.linkedin.pinot.common.data.DimensionFieldSpec;
-import com.linkedin.pinot.common.data.FieldSpec;
-import com.linkedin.pinot.common.data.FieldSpec.DataType;
-import com.linkedin.pinot.common.data.MetricFieldSpec;
-import com.linkedin.pinot.common.data.Schema;
-import com.linkedin.pinot.common.data.TimeFieldSpec;
-import com.linkedin.pinot.common.data.TimeGranularitySpec;
-import com.linkedin.thirdeye.hadoop.config.DimensionSpec;
-import com.linkedin.thirdeye.hadoop.config.MetricSpec;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-
-/**
- * This class contains the methods needed to transform
- * a ThirdEyeConfig into a Pinot Schema
- */
-public class ThirdeyePinotSchemaUtils {
-
-  private static Logger LOGGER = LoggerFactory.getLogger(ThirdeyePinotSchemaUtils.class);
-
-  /**
-   * Transforms the thirdeyeConfig to pinot schema
-   * Adds default __COUNT metric if not already present
-   * Adds additional columns for all dimensions which
-   * are wither specified as topk or whitelist
-   * and hence have a transformed new column_raw
-   * @param thirdeyeConfig
-   * @return
-   */
-  public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
-    Schema schema = new Schema();
-
-    Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
-    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
-      FieldSpec fieldSpec = new DimensionFieldSpec();
-      String dimensionName = dimensionSpec.getName();
-      fieldSpec.setName(dimensionName);
-      fieldSpec.setDataType(DataType.valueOf(dimensionSpec.getDimensionType().toString()));
-      fieldSpec.setSingleValueField(true);
-      schema.addField(fieldSpec);
-
-      if (transformDimensions.contains(dimensionName)) {
-        fieldSpec = new DimensionFieldSpec();
-        dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
-        fieldSpec.setName(dimensionName);
-        fieldSpec.setDataType(DataType.STRING);
-        fieldSpec.setSingleValueField(true);
-        schema.addField(fieldSpec);
-      }
-    }
-    boolean countIncluded = false;
-    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
-      FieldSpec fieldSpec = new MetricFieldSpec();
-      String metricName = metricSpec.getName();
-      if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
-        countIncluded = true;
-      }
-      fieldSpec.setName(metricName);
-      fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
-      fieldSpec.setSingleValueField(true);
-      schema.addField(fieldSpec);
-    }
-    if (!countIncluded) {
-      FieldSpec fieldSpec = new MetricFieldSpec();
-      String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
-      fieldSpec.setName(metricName);
-      fieldSpec.setDataType(DataType.LONG);
-      fieldSpec.setDefaultNullValue(1);
-      schema.addField(fieldSpec);
-    }
-    TimeGranularitySpec incoming =
-        new TimeGranularitySpec(DataType.LONG,
-            thirdeyeConfig.getTime().getTimeGranularity().getSize(),
-            thirdeyeConfig.getTime().getTimeGranularity().getUnit(),
-            thirdeyeConfig.getTime().getTimeFormat(),
-            thirdeyeConfig.getTime().getColumnName());
-    TimeGranularitySpec outgoing =
-        new TimeGranularitySpec(DataType.LONG,
-            thirdeyeConfig.getTime().getTimeGranularity().getSize(),
-            thirdeyeConfig.getTime().getTimeGranularity().getUnit(),
-            thirdeyeConfig.getTime().getTimeFormat(),
-            thirdeyeConfig.getTime().getColumnName());
-
-    schema.addField(new TimeFieldSpec(incoming, outgoing));
-
-    schema.setSchemaName(thirdeyeConfig.getCollection());
-
-    return schema;
-  }
-
-  public static Schema createSchema(String configPath) throws IOException {
-    FileSystem fs = FileSystem.get(new Configuration());
-
-    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.decode(fs.open(new Path(configPath)));
-    LOGGER.info("{}", thirdeyeConfig);
-
-    return createSchema(thirdeyeConfig);
-  }
-
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/DefaultWaitUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/DefaultWaitUDF.java
deleted file mode 100644
index 7da017a816..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/DefaultWaitUDF.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.wait;
-
-import java.util.Properties;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class DefaultWaitUDF implements WaitUDF {
-  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultWaitUDF.class);
-
-  private Properties inputConfig;
-
-  public DefaultWaitUDF() {
-
-  }
-
-  @Override
-  public void init(Properties inputConfig) {
-    this.inputConfig = inputConfig;
-  }
-
-  @Override
-  // default implementation always returns complete
-  public boolean checkCompleteness() {
-    return true;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitPhaseJob.java
deleted file mode 100644
index 20414e3bcd..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitPhaseJob.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.wait;
-
-import java.lang.reflect.Constructor;
-import java.util.Properties;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static com.linkedin.thirdeye.hadoop.wait.WaitPhaseJobConstants.*;
-
-public class WaitPhaseJob {
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(WaitPhaseJob.class);
-
-  private String name;
-  private Properties props;
-
-  public WaitPhaseJob(String name, Properties props) {
-    this.name = name;
-    this.props = props;
-  }
-
-  public void run() {
-
-    try {
-      String thirdeyeWaitClass = getAndCheck(WAIT_UDF_CLASS.toString());
-
-      if (thirdeyeWaitClass != null) {
-        LOGGER.info("Initializing class {}", thirdeyeWaitClass);
-        Constructor<?> constructor = Class.forName(thirdeyeWaitClass).getConstructor();
-        WaitUDF waitUdf = (WaitUDF) constructor.newInstance();
-        waitUdf.init(props);
-
-        boolean complete = waitUdf.checkCompleteness();
-        if (!complete) {
-          throw new RuntimeException("Input folder {} has not received all records");
-        }
-      }
-    }catch (Exception e) {
-      LOGGER.error("Exception in waiting for inputs", e);
-    }
-  }
-
-  private String getAndCheck(String propName) {
-    String propValue = props.getProperty(propName);
-    if (propValue == null) {
-      throw new IllegalArgumentException(propName + " required property");
-    }
-    return propValue;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitPhaseJobConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitPhaseJobConstants.java
deleted file mode 100644
index 08781db7f4..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitPhaseJobConstants.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.wait;
-
-public enum WaitPhaseJobConstants {
-  WAIT_UDF_CLASS("wait.udf.class"),
-  WAIT_POLL_TIMEOUT("wait.poll.timeout"),
-  WAIT_POLL_FREQUENCY("wait.poll.frequency");
-
-  String name;
-
-  WaitPhaseJobConstants(String name) {
-    this.name = name;
-  }
-
-  public String toString() {
-    return name;
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitUDF.java
deleted file mode 100644
index a41a6c7c34..0000000000
--- a/thirdeye/thirdeye-hadoop/src/main/java/com/linkedin/thirdeye/hadoop/wait/WaitUDF.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.wait;
-
-import java.util.Properties;
-
-
-/**
- * Simple interface to check completeness of input folder
- */
-public interface WaitUDF {
-
-  /**
-   * Initializes by providing input configs.
-   * @param inputConfig
-   */
-  void init(Properties inputConfig);
-
-  /**
-   * @return completeness status
-   * @throws IOException
-   * @throws
-   */
-   boolean checkCompleteness() throws Exception;
-}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/ThirdEyeJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/ThirdEyeJob.java
new file mode 100644
index 0000000000..c005a5ad5d
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/ThirdEyeJob.java
@@ -0,0 +1,455 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobStatus;
+import org.joda.time.DateTime;
+import org.joda.time.format.ISODateTimeFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.pinot.thirdeye.hadoop.aggregation.AggregationPhaseConstants;
+import org.apache.pinot.thirdeye.hadoop.aggregation.AggregationPhaseJob;
+import org.apache.pinot.thirdeye.hadoop.backfill.BackfillPhaseConstants;
+import org.apache.pinot.thirdeye.hadoop.backfill.BackfillPhaseJob;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+import org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants;
+import org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseJob;
+import org.apache.pinot.thirdeye.hadoop.join.JoinPhaseJob;
+import org.apache.pinot.thirdeye.hadoop.push.SegmentPushPhase;
+import org.apache.pinot.thirdeye.hadoop.push.SegmentPushPhaseConstants;
+import org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants;
+import org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseJob;
+import org.apache.pinot.thirdeye.hadoop.topk.TopKPhaseConstants;
+import org.apache.pinot.thirdeye.hadoop.topk.TopKPhaseJob;
+import org.apache.pinot.thirdeye.hadoop.transform.TransformPhaseJob;
+import org.apache.pinot.thirdeye.hadoop.wait.WaitPhaseJob;
+
+/**
+ * Wrapper to manage segment create and segment push jobs for thirdeye
+ */
+public class ThirdEyeJob {
+  private static final Logger LOGGER = LoggerFactory.getLogger(ThirdEyeJob.class);
+
+  private static final String USAGE = "usage: phase_name job.properties";
+
+  private final String phaseName;
+  private final Properties inputConfig;
+
+  public ThirdEyeJob(String jobName, Properties config) {
+    String phaseFromConfig = config.getProperty(ThirdEyeJobProperties.THIRDEYE_PHASE.getName());
+    if (phaseFromConfig != null) {
+      this.phaseName = phaseFromConfig;
+    } else {
+      this.phaseName = jobName;
+    }
+    this.inputConfig = config;
+  }
+
+  private enum PhaseSpec {
+
+    BACKFILL {
+      @Override
+      Class<?> getKlazz() {
+        return BackfillPhaseJob.class;
+      }
+
+      @Override
+      String getDescription() {
+        return "Backfills older pinot segments with star tree index and topk information";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+          DateTime minTime, DateTime maxTime, String inputPaths)
+              throws Exception {
+        Properties config = new Properties();
+
+        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_CONTROLLER_HOST.toString(),
+            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_HOSTS.getName()));
+        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_CONTROLLER_PORT.toString(),
+            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_PORT.getName()));
+
+        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_START_TIME.toString(),
+            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_BACKFILL_START_TIME.getName()));
+        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_END_TIME.toString(),
+            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_BACKFILL_END_TIME.getName()));
+
+        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_OUTPUT_PATH.toString(),
+            getIndexDir(root, collection, minTime, maxTime) + File.separator + BACKFILL.getName());
+        config.setProperty(BackfillPhaseConstants.BACKFILL_PHASE_TABLE_NAME.toString(), collection);
+
+        return config;
+      }
+    },
+    WAIT {
+      @Override
+      Class<?> getKlazz() {
+        return null;
+      }
+
+      @Override
+      String getDescription() {
+        return "Polls a pre-determined amount of time for the existence of input paths";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+          DateTime minTime, DateTime maxTime, String inputPaths)
+              throws Exception {
+        return null;
+      }
+    },
+    JOIN {
+      @Override
+      Class<?> getKlazz() {
+        return JoinPhaseJob.class;
+      }
+
+      @Override
+      String getDescription() {
+        return "Joins multiple data sets based on join key";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+          DateTime minTime, DateTime maxTime, String inputPaths) {
+        return inputConfig;
+      }
+    },
+    TRANSFORM {
+      @Override
+      Class<?> getKlazz() {
+        return TransformPhaseJob.class;
+      }
+
+      @Override
+      String getDescription() {
+        return "Transforms avro record";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+          DateTime minTime, DateTime maxTime, String inputPaths) {
+        return inputConfig;
+      }
+    },
+    AGGREGATION {
+      @Override
+      Class<?> getKlazz() {
+        return AggregationPhaseJob.class;
+      }
+
+      @Override
+      String getDescription() {
+        return "Aggregates input avro data to another time granularity";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+          DateTime minTime, DateTime maxTime, String inputPaths)
+              throws Exception {
+        Properties config = new Properties();
+
+        config.setProperty(AggregationPhaseConstants.AGG_PHASE_INPUT_PATH.toString(),
+            inputPaths);
+        config.setProperty(AggregationPhaseConstants.AGG_PHASE_OUTPUT_PATH.toString(),
+            getIndexDir(root, collection, minTime, maxTime) + File.separator
+                + AGGREGATION.getName());
+
+        return config;
+      }
+    },
+    TOPK {
+      @Override
+      Class<?> getKlazz() {
+        return TopKPhaseJob.class;
+      }
+
+      @Override
+      String getDescription() {
+        return "Topk";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+          DateTime minTime, DateTime maxTime, String inputPaths)
+              throws Exception {
+        Properties config = new Properties();
+
+        Path aggOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator + AGGREGATION.getName());
+        FileSystem fs = FileSystem.get(new Configuration());
+        if (fs.exists(aggOutputPath)) {
+          inputPaths = aggOutputPath.toString();
+        }
+        config.setProperty(TopKPhaseConstants.TOPK_PHASE_INPUT_PATH.toString(),
+            inputPaths);
+        config.setProperty(TopKPhaseConstants.TOPK_PHASE_OUTPUT_PATH.toString(),
+            getIndexDir(root, collection, minTime, maxTime) + File.separator
+                + TOPK.getName());
+
+        return config;
+      }
+    },
+    DERIVED_COLUMN_TRANSFORMATION {
+      @Override
+      Class<?> getKlazz() {
+        return DerivedColumnTransformationPhaseJob.class;
+      }
+
+      @Override
+      String getDescription() {
+        return "Adds new columns for dimensions with topk or whitelist";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+          DateTime minTime, DateTime maxTime, String inputPaths)
+              throws Exception {
+        Properties config = new Properties();
+
+        Path aggOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator + AGGREGATION.getName());
+        FileSystem fs = FileSystem.get(new Configuration());
+        if (fs.exists(aggOutputPath)) {
+          inputPaths = aggOutputPath.toString();
+        }
+        config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH.toString(),
+            inputPaths);
+        config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(),
+            getIndexDir(root, collection, minTime, maxTime));
+        config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH.toString(),
+            getIndexDir(root, collection, minTime, maxTime) + File.separator
+              + DERIVED_COLUMN_TRANSFORMATION.getName());
+        config.setProperty(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString(),
+            getIndexDir(root, collection, minTime, maxTime) + File.separator + TOPK.getName());
+
+        return config;
+      }
+    },
+    SEGMENT_CREATION {
+      @Override
+      Class<?> getKlazz() {
+        return SegmentCreationPhaseJob.class;
+      }
+
+      @Override
+      String getDescription() {
+        return "Generates pinot segments";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+          DateTime minTime, DateTime maxTime, String inputPaths)
+              throws Exception {
+        Properties config = new Properties();
+
+        Path derivedOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator +
+            DERIVED_COLUMN_TRANSFORMATION.getName());
+        Path aggregationOutputPath = new Path(getIndexDir(root, collection, minTime, maxTime) + File.separator +
+            AGGREGATION.getName());
+        FileSystem fs = FileSystem.get(new Configuration());
+        if (fs.exists(derivedOutputPath)) {
+          inputPaths = derivedOutputPath.toString();
+        } else if (fs.exists(aggregationOutputPath)) {
+          inputPaths = aggregationOutputPath.toString();
+        }
+
+        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_INPUT_PATH.toString(), inputPaths);
+        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_OUTPUT_PATH.toString(),
+            getIndexDir(root, collection, minTime, maxTime) + File.separator + SEGMENT_CREATION.getName());
+        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_START_TIME.toString(),
+            String.valueOf(minTime.getMillis()));
+        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_END_TIME.toString(),
+            String.valueOf(maxTime.getMillis()));
+
+        String schedule = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_FLOW_SCHEDULE.getName());
+        config.setProperty(SegmentCreationPhaseConstants.SEGMENT_CREATION_SCHEDULE.toString(), schedule);
+        return config;
+      }
+    },
+    SEGMENT_PUSH {
+      @Override
+      Class<?> getKlazz() {
+        return SegmentPushPhase.class;
+      }
+
+      @Override
+      String getDescription() {
+        return "Pushes pinot segments to pinot controller";
+      }
+
+      @Override
+      Properties getJobProperties(Properties inputConfig, String root, String collection,
+           DateTime minTime, DateTime maxTime, String inputPaths)
+              throws Exception {
+        Properties config = new Properties();
+
+        config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_INPUT_PATH.toString(),
+            getIndexDir(root, collection, minTime, maxTime) + File.separator + SEGMENT_CREATION.getName());
+        config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_CONTROLLER_HOSTS.toString(),
+            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_HOSTS.getName()));
+        config.setProperty(SegmentPushPhaseConstants.SEGMENT_PUSH_CONTROLLER_PORT.toString(),
+            inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_PINOT_CONTROLLER_PORT.getName()));
+        return config;
+      }
+    };
+
+    abstract Class<?> getKlazz();
+
+    abstract String getDescription();
+
+    abstract Properties getJobProperties(Properties inputConfig, String root, String collection,
+        DateTime minTime, DateTime maxTime, String inputPaths) throws Exception;
+
+    String getName() {
+      return this.name().toLowerCase();
+    }
+
+    String getIndexDir(String root, String collection, DateTime minTime,
+        DateTime maxTime) throws IOException {
+      return getCollectionDir(root, collection) + File.separator
+          + "data_" + ThirdEyeConstants.DATE_TIME_FORMATTER.print(minTime) + "_"
+          + ThirdEyeConstants.DATE_TIME_FORMATTER.print(maxTime);
+    }
+
+  }
+
+  private static void usage() {
+    System.err.println(USAGE);
+    for (PhaseSpec phase : PhaseSpec.values()) {
+      System.err.printf("%-30s : %s\n", phase.getName(), phase.getDescription());
+    }
+  }
+
+  private static String getAndCheck(String name, Properties properties) {
+    String value = properties.getProperty(name);
+    if (value == null) {
+      throw new IllegalArgumentException("Must provide " + name);
+    }
+    return value;
+  }
+
+
+  private static String getCollectionDir(String root, String collection) {
+    return root == null ? collection : root + File.separator + collection;
+  }
+
+  private void setMapreduceConfig(Configuration configuration) {
+    String mapreduceConfig =
+        inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_MR_CONF.getName());
+    if (mapreduceConfig != null && !mapreduceConfig.isEmpty()) {
+      String[] options = mapreduceConfig.split(",");
+      for (String option : options) {
+        String[] configs = option.split("=", 2);
+        if (configs.length == 2) {
+          LOGGER.info("Setting job configuration {} to {}", configs[0], configs[1]);
+          configuration.set(configs[0], configs[1]);
+        }
+      }
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  public void run() throws Exception {
+    LOGGER.info("Input config:{}", inputConfig);
+    PhaseSpec phaseSpec;
+    try {
+      phaseSpec = PhaseSpec.valueOf(phaseName.toUpperCase());
+    } catch (Exception e) {
+      usage();
+      throw e;
+    }
+
+    if (PhaseSpec.TRANSFORM.equals(phaseSpec)) {
+      TransformPhaseJob job = new TransformPhaseJob("Transform Job", inputConfig);
+      job.run();
+      return;
+
+    } else if (PhaseSpec.JOIN.equals(phaseSpec)) {
+      JoinPhaseJob job = new JoinPhaseJob("Join Job", inputConfig);
+      job.run();
+      return;
+
+    } else if (PhaseSpec.WAIT.equals(phaseSpec)) {
+      WaitPhaseJob job = new WaitPhaseJob("Wait for inputs", inputConfig);
+      job.run();
+      return;
+    }
+
+    // Get root, collection, input paths
+    String root = getAndCheck(ThirdEyeJobProperties.THIRDEYE_ROOT.getName(), inputConfig);
+    String collection =
+        getAndCheck(ThirdEyeJobProperties.THIRDEYE_COLLECTION.getName(), inputConfig);
+    String inputPaths = getAndCheck(ThirdEyeJobProperties.INPUT_PATHS.getName(), inputConfig);
+
+    // Get min / max time
+    DateTime minTime;
+    DateTime maxTime;
+
+    String minTimeProp = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_TIME_MIN.getName());
+    String maxTimeProp = inputConfig.getProperty(ThirdEyeJobProperties.THIRDEYE_TIME_MAX.getName());
+
+    minTime = ISODateTimeFormat.dateTimeParser().parseDateTime(minTimeProp);
+    maxTime = ISODateTimeFormat.dateTimeParser().parseDateTime(maxTimeProp);
+
+    Properties jobProperties = phaseSpec.getJobProperties(inputConfig, root, collection,
+        minTime, maxTime, inputPaths);
+    for (Object key : inputConfig.keySet()) {
+      jobProperties.setProperty(key.toString(), inputConfig.getProperty(key.toString()));
+    }
+
+    // Instantiate the job
+    Constructor<Configured> constructor = (Constructor<Configured>) phaseSpec.getKlazz()
+        .getConstructor(String.class, Properties.class);
+    Configured instance = constructor.newInstance(phaseSpec.getName(), jobProperties);
+    setMapreduceConfig(instance.getConf());
+
+    // Run the job
+    Method runMethod = instance.getClass().getMethod("run");
+    Job job = (Job) runMethod.invoke(instance);
+    if (job != null) {
+      JobStatus status = job.getStatus();
+      if (status.getState() != JobStatus.State.SUCCEEDED) {
+        throw new RuntimeException(
+            "Job " + job.getJobName() + " failed to execute: Ran with config:" + jobProperties);
+      }
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 2) {
+      usage();
+      System.exit(1);
+    }
+
+    String phaseName = args[0];
+    Properties config = new Properties();
+    config.load(new FileInputStream(args[1]));
+    new ThirdEyeJob(phaseName, config).run();
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/ThirdEyeJobProperties.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/ThirdEyeJobProperties.java
new file mode 100644
index 0000000000..21b67615fd
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/ThirdEyeJobProperties.java
@@ -0,0 +1,42 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop;
+
+public enum ThirdEyeJobProperties {
+  THIRDEYE_FLOW_SCHEDULE("thirdeye.flow.schedule"), // HOURLY, DAILY
+  THIRDEYE_PHASE("thirdeye.phase"), // segment_creation, segment_push
+  THIRDEYE_ROOT("thirdeye.root"),
+  THIRDEYE_COLLECTION("thirdeye.collection"),
+  THIRDEYE_TIME_MIN("thirdeye.time.min"), // YYYY-mm-ddThh
+  THIRDEYE_TIME_MAX("thirdeye.time.max"),
+  INPUT_PATHS("input.paths"),
+  THIRDEYE_MR_CONF("thirdeye.mr.conf"),
+  THIRDEYE_PINOT_CONTROLLER_HOSTS("thirdeye.pinot.controller.hosts"),
+  THIRDEYE_PINOT_CONTROLLER_PORT("thirdeye.pinot.controller.port"),
+  THIRDEYE_BACKFILL_START_TIME("thirdeye.backfill.start.time"),
+  THIRDEYE_BACKFILL_END_TIME("thirdeye.backfill.end.time"),
+  THIRDEYE_NUM_REDUCERS("thirdeye.num.reducers");
+
+  private final String propertyName;
+
+  ThirdEyeJobProperties(String propertyName) {
+    this.propertyName = propertyName;
+  }
+
+  public String getName() {
+    return propertyName;
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java
new file mode 100644
index 0000000000..b48064f3be
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseConfig.java
@@ -0,0 +1,109 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.aggregation;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pinot.thirdeye.hadoop.config.DimensionSpec;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+import org.apache.pinot.thirdeye.hadoop.config.MetricSpec;
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.config.TimeSpec;
+
+/**
+ * This class contains the config needed by aggregation
+ * and the methods to obtain the config from the ThirdEyeConfig
+ */
+public class AggregationPhaseConfig {
+  private List<String> dimensionNames;
+  private List<DimensionType> dimensionTypes;
+  private List<String> metricNames;
+  private List<MetricType> metricTypes;
+  private TimeSpec time;
+  private TimeSpec inputTime;
+
+  public AggregationPhaseConfig() {
+
+  }
+
+  public AggregationPhaseConfig(List<String> dimensionNames, List<String> metricNames,
+      List<DimensionType> dimensionTypes, List<MetricType> metricTypes, TimeSpec time, TimeSpec inputTime) {
+    super();
+    this.dimensionNames = dimensionNames;
+    this.dimensionTypes = dimensionTypes;
+    this.metricNames = metricNames;
+    this.metricTypes = metricTypes;
+    this.time = time;
+    this.inputTime = inputTime;
+  }
+
+  public List<String> getDimensionNames() {
+    return dimensionNames;
+  }
+
+  public List<DimensionType> getDimensionTypes() {
+    return dimensionTypes;
+  }
+
+  public List<String> getMetricNames() {
+    return metricNames;
+  }
+
+  public List<MetricType> getMetricTypes() {
+    return metricTypes;
+  }
+
+  public TimeSpec getTime() {
+    return time;
+  }
+
+  public TimeSpec getInputTime() {
+    return inputTime;
+  }
+
+  public static AggregationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
+
+    // metrics
+    List<String> metricNames = new ArrayList<>(config.getMetrics().size());
+    List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
+    for (MetricSpec spec : config.getMetrics()) {
+      metricNames.add(spec.getName());
+      metricTypes.add(spec.getType());
+    }
+
+    // dimensions
+    List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
+    List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
+    for (DimensionSpec spec : config.getDimensions()) {
+      dimensionNames.add(spec.getName());
+      dimensionTypes.add(spec.getDimensionType());
+    }
+
+    // time
+    TimeSpec time = config.getTime();
+
+    // input time
+    TimeSpec inputTime = config.getInputTime();
+    if (inputTime == null) {
+      throw new IllegalStateException("Must provide input time configs for aggregation job");
+    }
+
+    return new AggregationPhaseConfig(dimensionNames, metricNames, dimensionTypes, metricTypes, time, inputTime);
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java
new file mode 100644
index 0000000000..7e2b855f74
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseConstants.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.aggregation;
+/**
+ * This class contains the properties to be set for aggregation phase
+ */
+public enum AggregationPhaseConstants {
+
+  AGG_PHASE_INPUT_PATH("aggregation.phase.input.path"),
+  AGG_PHASE_AVRO_SCHEMA("aggregation.phase.avro.schema"),
+  AGG_PHASE_OUTPUT_PATH("aggregation.phase.output.path"),
+  AGG_PHASE_THIRDEYE_CONFIG("aggregation.phase.thirdeye.config");
+
+  String name;
+
+  AggregationPhaseConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseJob.java
new file mode 100644
index 0000000000..c1a89cf6b2
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseJob.java
@@ -0,0 +1,360 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.aggregation;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.avro.mapreduce.AvroKeyOutputFormat;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.pinot.thirdeye.hadoop.ThirdEyeJobProperties;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfigProperties;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+import org.apache.pinot.thirdeye.hadoop.config.TimeGranularity;
+import org.apache.pinot.thirdeye.hadoop.config.TimeSpec;
+import org.apache.pinot.thirdeye.hadoop.util.ThirdeyeAggregateMetricUtils;
+import org.apache.pinot.thirdeye.hadoop.util.ThirdeyeAvroUtils;
+
+import static org.apache.pinot.thirdeye.hadoop.aggregation.AggregationPhaseConstants.*;
+
+/**
+ * Buckets input avro data according to granularity specified in config and aggregates metrics
+ * Mapper:
+ * Converts time column into bucket granularity
+ * Reducer:
+ * Aggregates all records with same dimensions in one time bucket
+ */
+public class AggregationPhaseJob extends Configured {
+  private static final Logger LOGGER = LoggerFactory.getLogger(AggregationPhaseJob.class);
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+  private String name;
+  private Properties props;
+
+  public AggregationPhaseJob(String name, Properties props) {
+    super(new Configuration());
+    this.name = name;
+    this.props = props;
+  }
+
+  public static class AggregationMapper extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
+
+    private ThirdEyeConfig thirdeyeConfig;
+    private AggregationPhaseConfig config;
+    private List<String> dimensionNames;
+    private List<DimensionType> dimensionTypes;
+    private List<String> metricNames;
+    List<MetricType> metricTypes;
+    private int numMetrics;
+    private String timeColumnName;
+    private TimeGranularity inputGranularity;
+    private TimeGranularity aggregateGranularity;
+    private BytesWritable keyWritable;
+    private BytesWritable valWritable;
+    private int numRecords;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+      LOGGER.info("AggregationPhaseJob.AggregationPhaseMapper.setup()");
+      Configuration configuration = context.getConfiguration();
+
+      thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(AGG_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
+      config = AggregationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
+      dimensionNames = config.getDimensionNames();
+      dimensionTypes = config.getDimensionTypes();
+      metricNames = config.getMetricNames();
+      numMetrics = metricNames.size();
+      metricTypes = config.getMetricTypes();
+      timeColumnName = config.getTime().getColumnName();
+      inputGranularity = config.getInputTime().getTimeGranularity();
+      aggregateGranularity = config.getTime().getTimeGranularity();
+      keyWritable = new BytesWritable();
+      valWritable = new BytesWritable();
+      numRecords = 0;
+    }
+
+    @Override
+    public void map(AvroKey<GenericRecord> record, NullWritable value, Context context) throws IOException, InterruptedException {
+
+      // input record
+      GenericRecord inputRecord = record.datum();
+
+      // dimensions
+      List<Object> dimensions = new ArrayList<>();
+      for (String dimension : dimensionNames) {
+        Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimension);
+        dimensions.add(dimensionValue);
+      }
+
+      // metrics
+      Number[] metrics = new Number[numMetrics];
+      for (int i = 0; i < numMetrics; i++) {
+        Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricNames.get(i), metricTypes.get(i));
+        metrics[i] = metricValue;
+      }
+
+      // time
+      long timeValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName).longValue();
+      long inputTimeMillis = inputGranularity.toMillis(timeValue);
+      long bucketTime = aggregateGranularity.convertToUnit(inputTimeMillis);
+
+      AggregationPhaseMapOutputKey keyWrapper = new AggregationPhaseMapOutputKey(bucketTime, dimensions, dimensionTypes);
+      byte[] keyBytes = keyWrapper.toBytes();
+      keyWritable.set(keyBytes, 0, keyBytes.length);
+
+      AggregationPhaseMapOutputValue valWrapper = new AggregationPhaseMapOutputValue(metrics, metricTypes);
+      byte[] valBytes = valWrapper.toBytes();
+      valWritable.set(valBytes, 0, valBytes.length);
+
+      numRecords ++;
+      context.write(keyWritable, valWritable);
+    }
+
+    @Override
+    public void cleanup(Context context) throws IOException, InterruptedException {
+      context.getCounter(AggregationCounter.NUMBER_OF_RECORDS).increment(numRecords);
+    }
+  }
+
+  public static class AggregationReducer
+      extends Reducer<BytesWritable, BytesWritable, AvroKey<GenericRecord>, NullWritable> {
+
+    private Schema avroSchema;
+    private ThirdEyeConfig thirdeyeConfig;
+    private AggregationPhaseConfig config;
+    private List<String> dimensionsNames;
+    private List<DimensionType> dimensionTypes;
+    private List<String> metricNames;
+    List<MetricType> metricTypes;
+    private int numMetrics;
+    private TimeSpec time;
+    private int numRecords;
+    private Number[] metricSums;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+      LOGGER.info("AggregationPhaseJob.AggregationPhaseReducer.setup()");
+      Configuration configuration = context.getConfiguration();
+
+      thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(AGG_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
+      config = AggregationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
+      dimensionsNames = config.getDimensionNames();
+      dimensionTypes = config.getDimensionTypes();
+      metricNames = config.getMetricNames();
+      numMetrics = metricNames.size();
+      metricTypes = config.getMetricTypes();
+      time = config.getTime();
+      avroSchema = new Schema.Parser().parse(configuration.get(AGG_PHASE_AVRO_SCHEMA.toString()));
+      numRecords = 0;
+      metricSums = new Number[numMetrics];
+      Arrays.fill(metricSums, 0);
+    }
+
+    @Override
+    public void reduce(BytesWritable aggregationKey, Iterable<BytesWritable> values,
+        Context context) throws IOException, InterruptedException {
+
+      // output record
+      GenericRecord outputRecord = new Record(avroSchema);
+
+      AggregationPhaseMapOutputKey keyWrapper = AggregationPhaseMapOutputKey.fromBytes(aggregationKey.getBytes(), dimensionTypes);
+
+      // time
+      long timeValue = keyWrapper.getTime();
+      outputRecord.put(time.getColumnName(), timeValue);
+
+      // dimensions
+      List<Object> dimensionValues = keyWrapper.getDimensionValues();
+      for (int i = 0; i < dimensionsNames.size(); i++) {
+        String dimensionName = dimensionsNames.get(i);
+        Object dimensionValue = dimensionValues.get(i);
+        outputRecord.put(dimensionName, dimensionValue);
+      }
+
+      // aggregate metrics
+      Number[] aggMetricValues = new Number[numMetrics];
+      Arrays.fill(aggMetricValues, 0);
+      for (BytesWritable value : values) {
+        Number[] metricValues = AggregationPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes).getMetricValues();
+        ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
+      }
+      ThirdeyeAggregateMetricUtils.aggregate(metricTypes, metricSums, aggMetricValues);
+
+      // metrics
+      for (int i = 0; i < numMetrics; i++) {
+        String metricName = metricNames.get(i);
+        Number metricValue = aggMetricValues[i];
+        outputRecord.put(metricName, metricValue);
+      }
+
+      numRecords ++;
+      AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
+      context.write(outputKey, NullWritable.get());
+    }
+
+    @Override
+    public void cleanup(Context context) throws IOException, InterruptedException {
+      context.getCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED).increment(numRecords);
+      for (int i = 0; i < numMetrics; i++) {
+        context.getCounter(thirdeyeConfig.getCollection(), metricNames.get(i)).increment(metricSums[i].longValue());
+      }
+    }
+  }
+
+  public Job run() throws Exception {
+    Job job = Job.getInstance(getConf());
+    job.setJobName(name);
+    job.setJarByClass(AggregationPhaseJob.class);
+
+    FileSystem fs = FileSystem.get(getConf());
+    Configuration configuration = job.getConfiguration();
+
+    // Properties
+    LOGGER.info("Properties {}", props);
+
+     // Input Path
+    String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
+    LOGGER.info("Input path dir: " + inputPathDir);
+    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
+      LOGGER.info("Adding input:" + inputPath);
+      Path input = new Path(inputPath);
+      FileInputFormat.addInputPath(job, input);
+    }
+
+    // Output path
+    Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
+    LOGGER.info("Output path dir: " + outputPath.toString());
+    if (fs.exists(outputPath)) {
+      fs.delete(outputPath, true);
+    }
+    FileOutputFormat.setOutputPath(job, outputPath);
+
+    // Schema
+    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
+    LOGGER.info("Schema : {}", avroSchema.toString(true));
+    job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());
+
+    // ThirdEyeConfig
+    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
+    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
+    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
+    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
+    job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
+
+    // Map config
+    job.setMapperClass(AggregationMapper.class);
+    job.setInputFormatClass(AvroKeyInputFormat.class);
+    job.setMapOutputKeyClass(BytesWritable.class);
+    job.setMapOutputValueClass(BytesWritable.class);
+
+    // Reduce config
+    job.setReducerClass(AggregationReducer.class);
+    job.setOutputKeyClass(AvroKey.class);
+    job.setOutputValueClass(NullWritable.class);
+    AvroJob.setOutputKeySchema(job, avroSchema);
+    job.setOutputFormatClass(AvroKeyOutputFormat.class);
+    String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
+    LOGGER.info("Num Reducers : {}", numReducers);
+    if (StringUtils.isNotBlank(numReducers)) {
+      job.setNumReduceTasks(Integer.valueOf(numReducers));
+      LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
+    }
+
+    job.waitForCompletion(true);
+
+    Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
+    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
+    if (counter.getValue() == 0) {
+      throw new IllegalStateException("No input records in " + inputPathDir);
+    }
+    counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
+    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
+
+    for (String metric : thirdeyeConfig.getMetricNames()) {
+      counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
+      LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
+    }
+
+    return job;
+  }
+
+  private String getAndSetConfiguration(Configuration configuration,
+      AggregationPhaseConstants constant) {
+    String value = getAndCheck(constant.toString());
+    configuration.set(constant.toString(), value);
+    return value;
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+  public static enum AggregationCounter {
+    NUMBER_OF_RECORDS,
+    NUMBER_OF_RECORDS_FLATTENED
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("usage: config.properties");
+    }
+
+    Properties props = new Properties();
+    props.load(new FileInputStream(args[0]));
+
+    AggregationPhaseJob job = new AggregationPhaseJob("aggregate_avro_job", props);
+    job.run();
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java
new file mode 100644
index 0000000000..057a9469c9
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputKey.java
@@ -0,0 +1,111 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.aggregation;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+
+/**
+ * Wrapper for the key generated by mapper in Aggregation
+ */
+public class AggregationPhaseMapOutputKey {
+
+  private long time;
+  private List<Object> dimensionValues;
+  private List<DimensionType> dimensionTypes;
+
+  public AggregationPhaseMapOutputKey(long time, List<Object> dimensionValues, List<DimensionType> dimensionTypes) {
+    this.time = time;
+    this.dimensionValues = dimensionValues;
+    this.dimensionTypes = dimensionTypes;
+  }
+
+  public long getTime() {
+    return time;
+  }
+
+  public List<Object> getDimensionValues() {
+    return dimensionValues;
+  }
+
+  public List<DimensionType> getDimensionTypes() {
+    return dimensionTypes;
+  }
+
+  /**
+   * Converts AggregationPhaseMapOutputKey to bytes buffer
+   * @return
+   * @throws IOException
+   */
+  public byte[] toBytes() throws IOException {
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+
+    // time
+    dos.writeLong(time);
+
+    // dimensions size
+    dos.writeInt(dimensionValues.size());
+    // dimension values
+    for (int i = 0; i < dimensionValues.size(); i++) {
+      Object dimensionValue = dimensionValues.get(i);
+      DimensionType dimensionType = dimensionTypes.get(i);
+      DimensionType.writeDimensionValueToOutputStream(dos, dimensionValue, dimensionType);
+    }
+
+    baos.close();
+    dos.close();
+    return baos.toByteArray();
+  }
+
+  /**
+   * Constructs AggregationPhaseMapOutputKey from bytes buffer
+   * @param buffer
+   * @param dimensionTypes
+   * @return
+   * @throws IOException
+   */
+  public static AggregationPhaseMapOutputKey fromBytes(byte[] buffer, List<DimensionType> dimensionTypes) throws IOException {
+    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
+
+    // time
+    long time = dis.readLong();
+
+    // dimensions size
+    int size = dis.readInt();
+
+    // dimension value
+    List<Object> dimensionValues = new ArrayList<>();
+    for (int i = 0; i < size; i++) {
+      DimensionType dimensionType = dimensionTypes.get(i);
+      Object dimensionValue = DimensionType.readDimensionValueFromDataInputStream(dis, dimensionType);
+      dimensionValues.add(dimensionValue);
+    }
+
+    AggregationPhaseMapOutputKey wrapper;
+    wrapper = new AggregationPhaseMapOutputKey(time, dimensionValues, dimensionTypes);
+    return wrapper;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java
new file mode 100644
index 0000000000..49ba703741
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseMapOutputValue.java
@@ -0,0 +1,92 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.aggregation;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+
+/**
+ * Wrapper for value generated by mapper in AggregationPhase
+ */
+public class AggregationPhaseMapOutputValue {
+
+  private Number[] metricValues;
+  private List<MetricType> metricTypes;
+
+  public AggregationPhaseMapOutputValue(Number[] metricValues, List<MetricType> metricTypes) {
+    this.metricValues = metricValues;
+    this.metricTypes = metricTypes;
+  }
+
+  public Number[] getMetricValues() {
+    return metricValues;
+  }
+
+  /**
+   * Converts a AggregationPhaseMapOutputvalue to a bytes buffer
+   * @return
+   * @throws IOException
+   */
+  public byte[] toBytes() throws IOException {
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+
+    // metric values
+    dos.writeInt(metricValues.length);
+    for (int i = 0; i < metricValues.length; i++) {
+      Number number = metricValues[i];
+      MetricType metricType = metricTypes.get(i);
+      MetricType.writeMetricValueToDataOutputStream(dos, number, metricType);
+    }
+
+    baos.close();
+    dos.close();
+    return baos.toByteArray();
+  }
+
+  /**
+   * Constructs an AggregationPhaseMapOutputValue from a bytes buffer
+   * @param buffer
+   * @param metricTypes
+   * @return
+   * @throws IOException
+   */
+  public static AggregationPhaseMapOutputValue fromBytes(byte[] buffer, List<MetricType> metricTypes) throws IOException {
+    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
+
+    // metric values
+    int length = dis.readInt();
+    Number[] metricValues = new Number[length];
+
+    for (int i = 0 ; i < length; i++) {
+      MetricType metricType = metricTypes.get(i);
+      Number metricValue = MetricType.readMetricValueFromDataInputStream(dis, metricType);
+      metricValues[i] = metricValue;
+    }
+
+    AggregationPhaseMapOutputValue wrapper;
+    wrapper = new AggregationPhaseMapOutputValue(metricValues, metricTypes);
+    return wrapper;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillControllerAPIs.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillControllerAPIs.java
new file mode 100644
index 0000000000..a53680603c
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillControllerAPIs.java
@@ -0,0 +1,251 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.backfill;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.compress.archivers.ArchiveException;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.util.EntityUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.io.Files;
+import org.apache.pinot.common.utils.TarGzCompressionUtils;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+
+/**
+ * Contains APIs which are used for backfilling the pinot segments with star tree index
+ */
+public class BackfillControllerAPIs {
+
+  private static Logger LOGGER = LoggerFactory.getLogger(BackfillControllerAPIs.class);
+  private HttpHost controllerHttpHost;
+  private String tableName;
+
+  private static String SEGMENTS_ENDPOINT = "segments/";
+  private static String TABLES_ENDPOINT = "tables/";
+  private static String METADATA_ENDPOINT = "metadata";
+  private static String UTF_8 = "UTF-8";
+
+  private static String SEGMENT_NAME = "segment.name";
+  private static String SEGMENT_TABLE_NAME = "segment.table.name";
+  private static String SEGMENT_END_TIME = "segment.end.time";
+  private static String SEGMENT_START_TIME = "segment.start.time";
+  private static String SEGMENT_TIME_UNIT = "segment.time.unit";
+
+  BackfillControllerAPIs(String controllerHost, int controllerPort, String tableName) {
+    this.tableName = tableName;
+    LOGGER.info("Connecting to {} {} table {}", controllerHost, controllerPort, tableName);
+    controllerHttpHost = new HttpHost(controllerHost, controllerPort);
+  }
+
+  /**
+   * Downloads a segment from the controller, given the table name and segment name
+   * @param segmentName
+   * @param hdfsSegmentPath
+   * @throws IOException
+   * @throws ArchiveException
+   */
+  public void downloadSegment(String segmentName, Path hdfsSegmentPath)
+      throws IOException, ArchiveException {
+
+    FileSystem fs = FileSystem.get(new Configuration());
+    HttpClient controllerClient = new DefaultHttpClient();
+    HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tableName, UTF_8)
+        + "/" + URLEncoder.encode(segmentName, UTF_8));
+    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
+    try {
+      if (res.getStatusLine().getStatusCode() != 200) {
+        throw new IllegalStateException(res.getStatusLine().toString());
+      }
+      LOGGER.info("Fetching segment {}", segmentName);
+      InputStream content = res.getEntity().getContent();
+
+      File tempDir = new File(Files.createTempDir(), "thirdeye_temp");
+      tempDir.mkdir();
+      LOGGER.info("Creating temporary dir for staging segments {}", tempDir);
+      File tempSegmentDir = new File(tempDir, segmentName);
+      File tempSegmentTar = new File(tempDir, segmentName + ThirdEyeConstants.TAR_SUFFIX);
+
+      LOGGER.info("Downloading {} to {}", segmentName, tempSegmentTar);
+      OutputStream out = new FileOutputStream(tempSegmentTar);
+      IOUtils.copy(content, out);
+      if (!tempSegmentTar.exists()) {
+        throw new IllegalStateException("Download of " + segmentName + " unsuccessful");
+      }
+
+      LOGGER.info("Extracting segment {} to {}", tempSegmentTar, tempDir);
+      TarGzCompressionUtils.unTar(tempSegmentTar, tempDir);
+      File[] files = tempDir.listFiles(new FilenameFilter() {
+
+        @Override
+        public boolean accept(File dir, String name) {
+          return !name.endsWith(ThirdEyeConstants.TAR_SUFFIX) && new File(dir, name).isDirectory();
+        }
+      });
+      if (files.length == 0) {
+        throw new IllegalStateException("Failed to extract " + tempSegmentTar + " to " + tempDir);
+      } else if (!files[0].getName().equals(tempSegmentDir.getName())){
+        LOGGER.info("Moving extracted segment to the segment dir {}", tempSegmentDir);
+        FileUtils.moveDirectory(files[0], tempSegmentDir);
+      }
+      if (!tempSegmentDir.exists()) {
+        throw new IllegalStateException("Failed to move " + files[0] + " to " + tempSegmentDir);
+      }
+
+      LOGGER.info("Copying segment from {} to hdfs {}", tempSegmentDir, hdfsSegmentPath);
+      fs.copyFromLocalFile(new Path(tempSegmentDir.toString()), hdfsSegmentPath);
+      Path hdfsSegmentDir = new Path(hdfsSegmentPath, segmentName);
+      if (!fs.exists(hdfsSegmentDir)) {
+        throw new IllegalStateException("Failed to copy segment " + segmentName + " from local path " + tempSegmentDir
+            + " to hdfs path " + hdfsSegmentPath);
+      }
+    } finally {
+      if (res.getEntity() != null) {
+        EntityUtils.consume(res.getEntity());
+      }
+    }
+    LOGGER.info("Successfully downloaded segment {} to {}", segmentName, hdfsSegmentPath);
+  }
+
+  /**
+   * Given a time range and list of all segments for a table, returns all segments which are in the time range
+   * @param tableName
+   * @param allSegments
+   * @param startTime
+   * @param endTime
+   * @return
+   * @throws Exception
+   */
+  public List<String> findSegmentsInRange(String tableName, List<String> allSegments, long startTime, long endTime)
+      throws Exception {
+    List<String> segmentsInRange = new ArrayList<>();
+    for (String segmentName : allSegments) {
+      Map<String, String> metadata = getSegmentMetadata(tableName, segmentName);
+      long segmentStartTime = Long.valueOf(metadata.get(SEGMENT_START_TIME));
+      long segmentEndTime = Long.valueOf(metadata.get(SEGMENT_END_TIME));
+      String segmentTableName = metadata.get(SEGMENT_TABLE_NAME);
+
+      // TODO:
+      // Using time value directly for now, as we only have time unit and not time size in metadata
+      // Once we have time size in metadata, we can accept the time in millis and then convert time from metadata accordingly
+      if (segmentTableName.equals(tableName) && ((segmentStartTime >= startTime && segmentStartTime <= endTime)
+          || (segmentEndTime >= startTime && segmentEndTime <= endTime))) {
+        LOGGER.info("Segment name : {}, Segment start : {}, Segment end : {}, Segment table : {}",
+            segmentName, segmentStartTime, segmentEndTime, segmentTableName);
+        segmentsInRange.add(segmentName);
+      }
+    }
+    return segmentsInRange;
+  }
+
+  /**
+   * Fetches the list of all segment names for a table
+   * @param tableName
+   * @return
+   * @throws IOException
+   */
+  public List<String> getAllSegments(String tableName) throws IOException {
+    List<String> allSegments = new ArrayList<>();
+
+    HttpClient controllerClient = new DefaultHttpClient();
+    HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tableName, UTF_8));
+    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
+    try {
+      if (res.getStatusLine().getStatusCode() != 200) {
+        throw new IllegalStateException(res.getStatusLine().toString());
+      }
+      InputStream content = res.getEntity().getContent();
+      String response = IOUtils.toString(content);
+      List<String> allSegmentsPaths = getSegmentsFromResponse(response);
+      for (String segment : allSegmentsPaths) {
+        allSegments.add(segment.substring(segment.lastIndexOf("/") + 1));
+      }
+      LOGGER.info("All segments : {}", allSegments);
+    } finally {
+      if (res.getEntity() != null) {
+        EntityUtils.consume(res.getEntity());
+      }
+    }
+    return allSegments;
+  }
+
+  /**
+   * Returns the metadata of a segment, given the segment name and table name
+   * @param tableName - table where segment resides
+   * @param segmentName - name of the segment
+   * @return
+   * @throws IOException
+   */
+  public Map<String, String> getSegmentMetadata(String tableName, String segmentName) throws IOException {
+    Map<String, String> metadata = null;
+    HttpClient controllerClient = new DefaultHttpClient();
+    HttpGet req = new HttpGet(TABLES_ENDPOINT + URLEncoder.encode(tableName, UTF_8)
+        + "/" + SEGMENTS_ENDPOINT + URLEncoder.encode(segmentName, UTF_8) + "/" + METADATA_ENDPOINT);
+    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
+    try {
+      if (res.getStatusLine().getStatusCode() != 200) {
+        throw new IllegalStateException(res.getStatusLine().toString());
+      }
+      InputStream content = res.getEntity().getContent();
+      String metadataResponse = IOUtils.toString(content);
+      metadata = getMetadataFromResponse(metadataResponse);
+    } finally {
+      if (res.getEntity() != null) {
+        EntityUtils.consume(res.getEntity());
+      }
+    }
+    return metadata;
+  }
+
+  private List<String> getSegmentsFromResponse(String response) {
+    String[] allSegments = response.replaceAll("\\[|\\]|\"", "").split(",");
+    return Arrays.asList(allSegments);
+  }
+
+  private Map<String, String> getMetadataFromResponse(String response) {
+    Map<String, String> metadata = new HashMap<>();
+    String cleanUpResponse = response.replaceAll("\\[|\\]|\"|\\{|\\}|\\\\", "");
+    String[] allProperties = cleanUpResponse.replace("state:", "").split(",");
+    for (String property : allProperties) {
+      String[] tokens = property.split(":", 2);
+      metadata.put(tokens[0], tokens[1]);
+    }
+    return metadata;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseConstants.java
new file mode 100644
index 0000000000..1af5d2c8a1
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseConstants.java
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.backfill;
+
+/**
+ * This class contains the properties to be set for backfill phase
+ */
+public enum BackfillPhaseConstants {
+
+  BACKFILL_PHASE_CONTROLLER_HOST("backfill.phase.controller.host"),
+  BACKFILL_PHASE_CONTROLLER_PORT("backfill.phase.controller.port"),
+  BACKFILL_PHASE_START_TIME("backfill.phase.start.time"),
+  BACKFILL_PHASE_END_TIME("backfill.phase.end.time"),
+  BACKFILL_PHASE_TABLE_NAME("backfill.phase.table.name"),
+  BACKFILL_PHASE_OUTPUT_PATH("backfill.phase.output.path");
+
+  String name;
+
+  BackfillPhaseConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseJob.java
new file mode 100644
index 0000000000..0609ec60b4
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseJob.java
@@ -0,0 +1,203 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.backfill;
+
+import static org.apache.pinot.thirdeye.hadoop.backfill.BackfillPhaseConstants.*;
+
+import java.io.FileInputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import com.google.common.collect.Lists;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This phase is for backfilling segments which are already present on pinot, with star tree and topk information
+ * The pinot segments are downloaded from the table, and converted to avro files
+ * These avro files are then passed on to the rest of the thirdeye-hadoop segment generation pipeline
+ */
+public class BackfillPhaseJob extends Configured {
+  private static final Logger LOGGER = LoggerFactory.getLogger(BackfillPhaseJob.class);
+  private static final String DOWNLOAD = "download";
+  private static final String INPUT = "input";
+  private static final String OUTPUT = "output";
+
+  private String name;
+  private Properties props;
+
+  /**
+   * @param name
+   * @param props
+   */
+  public BackfillPhaseJob(String name, Properties props) {
+    super(new Configuration());
+    getConf().set("mapreduce.job.user.classpath.first", "true");
+    this.name = name;
+    this.props = props;
+  }
+
+  public Job run() throws Exception {
+
+    Job job = Job.getInstance(getConf());
+    job.setJarByClass(BackfillPhaseJob.class);
+    job.setJobName(name);
+
+    FileSystem fs = FileSystem.get(getConf());
+    Configuration configuration = job.getConfiguration();
+
+    LOGGER.info("*******************************************************************************");
+    String controllerHost = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_HOST);
+    String controllerPort = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_PORT);
+    LOGGER.info("Controller Host : {} Controller Port : {}", controllerHost, controllerPort);
+    String segmentStartTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_START_TIME);
+    String segmentEndTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_END_TIME);
+    long startTime = Long.valueOf(segmentStartTime);
+    long endTime = Long.valueOf(segmentEndTime);
+    if (Long.valueOf(segmentStartTime) > Long.valueOf(segmentEndTime)) {
+      throw new IllegalStateException("Start time cannot be greater than end time");
+    }
+    String tableName = getAndSetConfiguration(configuration, BACKFILL_PHASE_TABLE_NAME);
+    LOGGER.info("Start time : {} End time : {} Table name : {}", segmentStartTime, segmentEndTime, tableName);
+
+    String outputPath = getAndSetConfiguration(configuration, BACKFILL_PHASE_OUTPUT_PATH);
+    LOGGER.info("Output path : {}", outputPath);
+    Path backfillDir = new Path(outputPath);
+    if (fs.exists(backfillDir)) {
+      LOGGER.warn("Found the output folder deleting it");
+      fs.delete(backfillDir, true);
+    }
+    Path downloadDir = new Path(backfillDir, DOWNLOAD);
+    LOGGER.info("Creating download dir : {}", downloadDir);
+    fs.mkdirs(downloadDir);
+    Path inputDir = new Path(backfillDir, INPUT);
+    LOGGER.info("Creating input dir : {}", inputDir);
+    fs.mkdirs(inputDir);
+    Path outputDir = new Path(backfillDir, OUTPUT);
+    LOGGER.info("Creating output dir : {}", outputDir);
+
+    BackfillControllerAPIs backfillControllerAPIs = new BackfillControllerAPIs(controllerHost,
+        Integer.valueOf(controllerPort), tableName);
+
+    LOGGER.info("Downloading segments in range {} to {}", startTime, endTime);
+    List<String> allSegments = backfillControllerAPIs.getAllSegments(tableName);
+    List<String> segmentsToDownload = backfillControllerAPIs.findSegmentsInRange(tableName, allSegments, startTime, endTime);
+    for (String segmentName : segmentsToDownload) {
+      backfillControllerAPIs.downloadSegment(segmentName, downloadDir);
+    }
+
+    LOGGER.info("Reading downloaded segment input files");
+    List<FileStatus> inputDataFiles = new ArrayList<>();
+    inputDataFiles.addAll(Lists.newArrayList(fs.listStatus(downloadDir)));
+    LOGGER.info("size {}", inputDataFiles.size());
+
+    try {
+      LOGGER.info("Creating input files at {} for segment input files", inputDir);
+      for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
+        FileStatus file = inputDataFiles.get(seqId);
+        String completeFilePath = " " + file.getPath().toString() + " " + seqId;
+        Path newOutPutFile = new Path((inputDir + "/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
+        FSDataOutputStream stream = fs.create(newOutPutFile);
+        LOGGER.info("wrote {}", completeFilePath);
+        stream.writeUTF(completeFilePath);
+        stream.flush();
+        stream.close();
+      }
+    } catch (Exception e) {
+      LOGGER.error("Exception while reading input files ", e);
+    }
+
+    job.setMapperClass(BackfillPhaseMapJob.BackfillMapper.class);
+
+    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
+      job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
+    }
+
+    job.setInputFormatClass(TextInputFormat.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+
+    job.setMapOutputKeyClass(LongWritable.class);
+    job.setMapOutputValueClass(Text.class);
+
+    FileInputFormat.addInputPath(job, inputDir);
+    FileOutputFormat.setOutputPath(job, outputDir);
+
+    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
+    job.setMaxReduceAttempts(1);
+    job.setMaxMapAttempts(0);
+    job.setNumReduceTasks(0);
+
+    for (Object key : props.keySet()) {
+      job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
+    }
+
+    job.waitForCompletion(true);
+    if (!job.isSuccessful()) {
+      throw new RuntimeException("Job failed : " + job);
+    }
+
+    LOGGER.info("Cleanup the working directory");
+    LOGGER.info("Deleting the dir: {}", downloadDir);
+    fs.delete(downloadDir, true);
+    LOGGER.info("Deleting the dir: {}", inputDir);
+    fs.delete(inputDir, true);
+    LOGGER.info("Deleting the dir: {}", outputDir);
+    fs.delete(outputDir, true);
+
+    return job;
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+  private String getAndSetConfiguration(Configuration configuration, BackfillPhaseConstants constant) {
+    String value = getAndCheck(constant.toString());
+    configuration.set(constant.toString(), value);
+    return value;
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("usage: config.properties");
+    }
+    Properties props = new Properties();
+    props.load(new FileInputStream(args[0]));
+    BackfillPhaseJob job = new BackfillPhaseJob("backfill_job", props);
+    job.run();
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java
new file mode 100644
index 0000000000..2dde519f35
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/backfill/BackfillPhaseMapJob.java
@@ -0,0 +1,164 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.backfill;
+
+import org.apache.pinot.core.data.GenericRow;
+import org.apache.pinot.core.data.readers.PinotSegmentRecordReader;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+import org.apache.pinot.thirdeye.hadoop.util.ThirdeyeAvroUtils;
+import java.io.File;
+import java.io.IOException;
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.pinot.thirdeye.hadoop.backfill.BackfillPhaseConstants.*;
+
+/**
+ * Mapper class for Backfill job, which converts a pinot segment to avro files
+ */
+public class BackfillPhaseMapJob {
+
+  public static class BackfillMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
+    private static Logger LOGGER = LoggerFactory.getLogger(BackfillPhaseMapJob.class);
+
+    private Configuration properties;
+
+    private String inputPath;
+    private String outputPath;
+    private String currentDiskWorkDir;
+    private FileSystem fs;
+
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+
+      currentDiskWorkDir = "pinot_hadoop_tmp";
+      new File(currentDiskWorkDir).mkdir();
+
+      LOGGER.info("*********************************************************************");
+      LOGGER.info("Configurations : {}", context.getConfiguration().toString());
+      LOGGER.info("Current DISK working dir : {}", new File(currentDiskWorkDir).getAbsolutePath());
+      LOGGER.info("*********************************************************************");
+
+      properties = context.getConfiguration();
+      fs = FileSystem.get(new Configuration());
+
+      outputPath = properties.get(BACKFILL_PHASE_OUTPUT_PATH.toString());
+    }
+
+    @Override
+    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
+
+      String line = value.toString();
+      String[] lineSplits = line.split(" ");
+
+      LOGGER.info("*********************************************************************");
+      LOGGER.info("mapper input : {}", value);
+      LOGGER.info("Path to output : {}", outputPath);
+      LOGGER.info("num lines : {}", lineSplits.length);
+
+      for (String split : lineSplits) {
+        LOGGER.info("Command line : {}", split);
+      }
+      if (lineSplits.length != 3) {
+        throw new RuntimeException("Input to the mapper is malformed");
+      }
+      inputPath = lineSplits[1].trim();
+
+      LOGGER.info("input data file path : {}", inputPath);
+      LOGGER.info("*********************************************************************");
+
+      try {
+        createAvro(inputPath);
+        LOGGER.info("Finished avro creation job successfully");
+      } catch (Exception e) {
+        LOGGER.error("Got exceptions during creating avro!", e);
+      }
+      LOGGER.info("Finished the job successfully!");
+    }
+
+    private void createAvro(String dataFilePath) throws Exception {
+
+      Path hdfsDataPath = new Path(dataFilePath);
+      File dataPath = new File(currentDiskWorkDir, "data");
+      if (dataPath.exists()) {
+        dataPath.delete();
+      }
+      dataPath.mkdir();
+      LOGGER.info("Creating temporary data dir {}", dataPath);
+
+      final File avroPath = new File(currentDiskWorkDir, "avro");
+      if (avroPath.exists()) {
+        avroPath.delete();
+      }
+      avroPath.mkdir();
+      LOGGER.info("Creating temporary avro dir {}", avroPath);
+
+      String segmentName = hdfsDataPath.getName();
+      final Path localFilePath = new Path(dataPath + "/" + segmentName);
+      fs.copyToLocalFile(hdfsDataPath, localFilePath);
+      LOGGER.info("Copying segment {} from {} to local {}", segmentName, hdfsDataPath, localFilePath);
+      File segmentIndexDir = new File(localFilePath.toString());
+      if (!segmentIndexDir.exists()) {
+        throw new IllegalStateException("Failed to copy " + hdfsDataPath + " to " + localFilePath);
+      }
+
+      LOGGER.info("Initializing PinotSegmentRecordReader with segment index dir {}", segmentIndexDir);
+      PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(segmentIndexDir);
+      LOGGER.info("Schema {}", pinotSegmentRecordReader.getSchema());
+
+      Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSegmentRecordReader.getSchema());
+      GenericDatumWriter<GenericRecord> datum = new GenericDatumWriter<GenericRecord>(avroSchema);
+      DataFileWriter<GenericRecord> recordWriter = new DataFileWriter<GenericRecord>(datum);
+      File localAvroFile = new File(avroPath, segmentName + ThirdEyeConstants.AVRO_SUFFIX);
+      recordWriter.create(avroSchema, localAvroFile);
+
+      LOGGER.info("Converting pinot segment to avro at {}", localAvroFile);
+      while (pinotSegmentRecordReader.hasNext()) {
+        GenericRecord outputRecord = new Record(avroSchema);
+        GenericRow row = pinotSegmentRecordReader.next();
+        for (String fieldName : row.getFieldNames()) {
+          outputRecord.put(fieldName, row.getValue(fieldName));
+        }
+        recordWriter.append(outputRecord);
+      }
+      LOGGER.info("Writing to avro file at {}", localAvroFile);
+      recordWriter.close();
+      if (!localAvroFile.exists()) {
+        LOGGER.info("Failed to write avro file to {}", localAvroFile);
+      }
+      pinotSegmentRecordReader.close();
+
+      LOGGER.info("Coping avro file from {} to hdfs at {}", localAvroFile, outputPath);
+      fs.copyFromLocalFile(true, true, new Path(localAvroFile.toString()), new Path(outputPath));
+      if (!fs.exists(new Path(outputPath))) {
+        throw new IllegalStateException("Failed to copy avro file to hdfs at " + outputPath );
+      }
+      LOGGER.info("Successfully copied {} to {}", localAvroFile, outputPath);
+    }
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/DimensionSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/DimensionSpec.java
new file mode 100644
index 0000000000..49d3671dd4
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/DimensionSpec.java
@@ -0,0 +1,58 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import java.util.Objects;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Class for representing dimension specs such as name
+ * @param name - dimension name
+ */
+public class DimensionSpec {
+  private String name;
+  private DimensionType dimensionType;
+
+  public DimensionSpec() {
+  }
+
+
+  public DimensionSpec(String name, DimensionType dimensionType) {
+    this.name = name;
+    this.dimensionType = dimensionType;
+  }
+
+  @JsonProperty
+  public String getName() {
+    return name;
+  }
+
+  @JsonProperty
+  public DimensionType getDimensionType() {
+    return dimensionType;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof DimensionSpec)) {
+      return false;
+    }
+    DimensionSpec d = (DimensionSpec) o;
+
+    return Objects.equals(d.getName(), name) && Objects.equals(d.getDimensionType(), dimensionType);
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/DimensionType.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/DimensionType.java
new file mode 100644
index 0000000000..8efd5dc451
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/DimensionType.java
@@ -0,0 +1,205 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+/**
+ * Represents the various data types supported for a dimension<br/>
+ * Currently we support INT, SHORT, LONG, FLOAT, DOUBLE, STRING, BOOLEAN
+ */
+public enum DimensionType {
+  INT {
+    @Override
+    public Object getValueFromString(String strVal) {
+      return Integer.valueOf(strVal);
+    }
+
+    @Override
+    public Object getDefaultNullvalue() {
+      return ThirdEyeConstants.EMPTY_INT;
+    }
+
+    @Override
+    public Object getDefaultOtherValue() {
+      return ThirdEyeConstants.EMPTY_INT;
+    }
+  },
+  SHORT {
+    @Override
+    public Object getValueFromString(String strVal) {
+      return Short.valueOf(strVal);
+    }
+
+    @Override
+    public Object getDefaultNullvalue() {
+      return ThirdEyeConstants.EMPTY_SHORT;
+    }
+
+    @Override
+    public Object getDefaultOtherValue() {
+      return ThirdEyeConstants.EMPTY_SHORT;
+    }
+  },
+  LONG {
+    @Override
+    public Object getValueFromString(String strVal) {
+      return Long.valueOf(strVal);
+    }
+
+    @Override
+    public Object getDefaultNullvalue() {
+      return ThirdEyeConstants.EMPTY_LONG;
+    }
+
+    @Override
+    public Object getDefaultOtherValue() {
+      return ThirdEyeConstants.EMPTY_LONG;
+    }
+  },
+  FLOAT {
+    @Override
+    public Object getValueFromString(String strVal) {
+      return Float.valueOf(strVal);
+    }
+
+    @Override
+    public Object getDefaultNullvalue() {
+      return ThirdEyeConstants.EMPTY_FLOAT;
+    }
+
+    @Override
+    public Object getDefaultOtherValue() {
+      return ThirdEyeConstants.EMPTY_FLOAT;
+    }
+  },
+  DOUBLE {
+    @Override
+    public Object getValueFromString(String strVal) {
+      return Double.valueOf(strVal);
+    }
+
+    @Override
+    public Object getDefaultNullvalue() {
+      return ThirdEyeConstants.EMPTY_DOUBLE;
+    }
+
+    @Override
+    public Object getDefaultOtherValue() {
+      return ThirdEyeConstants.EMPTY_DOUBLE;
+    }
+  },
+  STRING {
+    @Override
+    public Object getValueFromString(String strVal) {
+      return strVal;
+    }
+
+    @Override
+    public Object getDefaultNullvalue() {
+      return ThirdEyeConstants.EMPTY_STRING;
+    }
+
+    @Override
+    public Object getDefaultOtherValue() {
+      return ThirdEyeConstants.OTHER;
+    }
+  };
+
+
+  public abstract Object getValueFromString(String strVal);
+
+  public abstract Object getDefaultNullvalue();
+
+  public abstract Object getDefaultOtherValue();
+
+
+  /**
+   * Writes the dimension value to a data outputstream
+   * @param dos DataOutputStream
+   * @param dimensionValue
+   * @param dimensionType
+   * @throws IOException
+   */
+  public static void writeDimensionValueToOutputStream(DataOutputStream dos, Object dimensionValue,
+      DimensionType dimensionType) throws IOException {
+    switch (dimensionType) {
+    case DOUBLE:
+      dos.writeDouble((double) dimensionValue);
+      break;
+    case FLOAT:
+      dos.writeFloat((float) dimensionValue);
+      break;
+    case INT:
+      dos.writeInt((int) dimensionValue);
+      break;
+    case LONG:
+      dos.writeLong((long) dimensionValue);
+      break;
+    case SHORT:
+      dos.writeShort((short) dimensionValue);
+      break;
+    case STRING:
+      String stringVal = (String) dimensionValue;
+      byte[] bytes = stringVal.getBytes();
+      dos.writeInt(bytes.length);
+      dos.write(bytes);
+      break;
+    default:
+      throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
+    }
+  }
+
+  /**
+   * Reads the dimension value from a given data input stream
+   * @param dis DataInputStream
+   * @param dimensionType
+   * @return
+   * @throws IOException
+   */
+  public static Object readDimensionValueFromDataInputStream(DataInputStream dis, DimensionType dimensionType) throws IOException {
+    Object dimensionValue = null;
+    switch (dimensionType) {
+    case DOUBLE:
+      dimensionValue = dis.readDouble();
+      break;
+    case FLOAT:
+      dimensionValue = dis.readFloat();
+      break;
+    case INT:
+      dimensionValue = dis.readInt();
+      break;
+    case SHORT:
+      dimensionValue = dis.readShort();
+      break;
+    case LONG:
+      dimensionValue = dis.readLong();
+      break;
+    case STRING:
+      int length = dis.readInt();
+      byte[] bytes = new byte[length];
+      dis.read(bytes);
+      dimensionValue = new String(bytes);
+      break;
+    default:
+      throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
+    }
+    return dimensionValue;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/MetricSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/MetricSpec.java
new file mode 100644
index 0000000000..92c059b5a9
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/MetricSpec.java
@@ -0,0 +1,57 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Class for representing metric specs
+ * @param name - metric name
+ * @param type - metric type
+ */
+public class MetricSpec {
+  private String name;
+  private MetricType type;
+
+  public MetricSpec() {
+  }
+
+  public MetricSpec(String name, MetricType type) {
+    this.name = name;
+    this.type = type;
+  }
+
+  @JsonProperty
+  public String getName() {
+    return name;
+  }
+
+  @JsonProperty
+  public MetricType getType() {
+    return type;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof MetricSpec)) {
+      return false;
+    }
+
+    MetricSpec m = (MetricSpec) o;
+
+    return name.equals(m.getName()) && type.equals(m.getType());
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/MetricType.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/MetricType.java
new file mode 100644
index 0000000000..96d6ad7727
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/MetricType.java
@@ -0,0 +1,175 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+/**
+ * Represents the various data types supported for a metric<br/>
+ * Currently we support INT, SHORT, LONG, FLOAT, DOUBLE
+ */
+public enum MetricType {
+
+  INT {
+    public Number toNumber(String s) {
+      return Integer.parseInt(s);
+    }
+
+    public int byteSize() {
+      return 4;
+    }
+
+    @Override
+    public Number getDefaultNullValue() {
+      return ThirdEyeConstants.EMPTY_INT;
+    }
+
+  },
+  SHORT {
+    public Number toNumber(String s) {
+      return Short.parseShort(s);
+    }
+
+    public int byteSize() {
+      return 2;
+
+    }
+
+    @Override
+    public Number getDefaultNullValue() {
+      return ThirdEyeConstants.EMPTY_SHORT;
+    }
+
+  },
+  LONG {
+    public Number toNumber(String s) {
+      return Long.parseLong(s);
+    }
+
+    public int byteSize() {
+      return 8;
+
+    }
+
+    @Override
+    public Number getDefaultNullValue() {
+      return ThirdEyeConstants.EMPTY_LONG;
+    }
+
+  },
+  FLOAT {
+    public Number toNumber(String s) {
+      return Float.parseFloat(s);
+    }
+
+    public int byteSize() {
+      return 4;
+
+    }
+
+    @Override
+    public Number getDefaultNullValue() {
+      return ThirdEyeConstants.EMPTY_FLOAT;
+    }
+
+  },
+  DOUBLE {
+    public Number toNumber(String s) {
+      return Double.parseDouble(s);
+    }
+
+    public int byteSize() {
+      return 8;
+    }
+
+    @Override
+    public Number getDefaultNullValue() {
+      return ThirdEyeConstants.EMPTY_DOUBLE;
+    }
+  };
+
+  public Number toNumber(String s) {
+    throw new AbstractMethodError();
+  }
+
+  public int byteSize() {
+    throw new AbstractMethodError();
+  }
+
+  public abstract Number getDefaultNullValue();
+
+  /**
+   * Writes a metric value to a data output stream
+   * @param dos
+   * @param number
+   * @param metricType
+   * @throws IOException
+   */
+  public static void writeMetricValueToDataOutputStream(DataOutputStream dos, Number number, MetricType metricType) throws IOException {
+    switch (metricType) {
+    case SHORT:
+      dos.writeShort(number.intValue());
+      break;
+    case LONG:
+      dos.writeLong(number.longValue());
+      break;
+    case INT:
+      dos.writeInt(number.intValue());
+      break;
+    case FLOAT:
+      dos.writeFloat(number.floatValue());
+      break;
+    case DOUBLE:
+      dos.writeDouble(number.doubleValue());
+      break;
+    default:
+      throw new IllegalArgumentException("Unsupported metricType " + metricType);
+    }
+  }
+
+  /**
+   * Reads a metric value from a data input stream
+   * @param dis
+   * @param metricType
+   * @return
+   * @throws IOException
+   */
+  public static Number readMetricValueFromDataInputStream(DataInputStream dis, MetricType metricType) throws IOException {
+    Number metricValue = null;
+    switch (metricType) {
+    case SHORT:
+      metricValue = dis.readShort();
+      break;
+    case LONG:
+      metricValue = dis.readLong();
+      break;
+    case INT:
+      metricValue = dis.readInt();
+      break;
+    case FLOAT:
+      metricValue = dis.readFloat();
+      break;
+    case DOUBLE:
+      metricValue = dis.readDouble();
+      break;
+    default:
+      throw new IllegalArgumentException("Unsupported metricType " + metricType);
+    }
+    return metricValue;
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/SplitSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/SplitSpec.java
new file mode 100644
index 0000000000..bdbe3192d3
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/SplitSpec.java
@@ -0,0 +1,48 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.List;
+
+/**
+ * Class for representing split spec
+ * @param threshold - threshold after which to stop splitting on a node in star tree
+ * @param order - order in which dimensions should be chosen to split in star tree creation
+ */
+public class SplitSpec {
+  private int threshold = 1000;
+  private List<String> order;
+
+  public SplitSpec() {
+  }
+
+  public SplitSpec(int threshold, List<String> order) {
+    this.threshold = threshold;
+    this.order = order;
+  }
+
+  @JsonProperty
+  public int getThreshold() {
+    return threshold;
+  }
+
+  @JsonProperty
+  public List<String> getOrder() {
+    return order;
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfig.java
new file mode 100644
index 0000000000..96f4827c65
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfig.java
@@ -0,0 +1,479 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.lang.StringUtils;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+import com.google.common.collect.Lists;
+import org.apache.pinot.common.data.TimeGranularitySpec.TimeFormat;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionSpec;
+import org.apache.pinot.thirdeye.hadoop.config.MetricSpec;
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.SplitSpec;
+import org.apache.pinot.thirdeye.hadoop.config.TimeGranularity;
+import org.apache.pinot.thirdeye.hadoop.config.TimeSpec;
+import org.apache.pinot.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
+import org.apache.pinot.thirdeye.hadoop.config.TopkWhitelistSpec;
+
+/**
+ * This class represents the configs required by the thirdeye-hadoop jobs
+ * @param collection - name of the pinot table
+ * @param dimensions - list of dimensionSpecs for dimensions
+ * @param metrics - list of metricSpecs for metrics
+ * @param time - time spec
+ * @topKWhitelist - metric threshold, topk and whitelist spec
+ * @split - split spec
+ */
+public final class ThirdEyeConfig {
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory());
+  private static final String FIELD_SEPARATOR = ",";
+  private static final String CONFIG_JOINER = ".";
+  private static final String DEFAULT_TIME_TYPE = "HOURS";
+  private static final String DEFAULT_TIME_SIZE = "1";
+  private static final String DEFAULT_TIME_FORMAT = TimeFormat.EPOCH.toString();
+
+  private String collection;
+  private List<DimensionSpec> dimensions;
+  private List<MetricSpec> metrics;
+  private TimeSpec inputTime = new TimeSpec();
+  private TimeSpec time = new TimeSpec();
+  private TopkWhitelistSpec topKWhitelist = new TopkWhitelistSpec();
+  private SplitSpec split = new SplitSpec();
+
+  public ThirdEyeConfig() {
+  }
+
+  public ThirdEyeConfig(String collection, List<DimensionSpec> dimensions,
+      List<MetricSpec> metrics, TimeSpec inputTime, TimeSpec time, TopkWhitelistSpec topKWhitelist, SplitSpec split) {
+    this.collection = collection;
+    this.dimensions = dimensions;
+    this.metrics = metrics;
+    this.inputTime = inputTime;
+    this.time = time;
+    this.topKWhitelist = topKWhitelist;
+    this.split = split;
+  }
+
+  public String getCollection() {
+    return collection;
+  }
+
+  public List<DimensionSpec> getDimensions() {
+    return dimensions;
+  }
+
+  @JsonIgnore
+  public List<String> getDimensionNames() {
+    List<String> results = new ArrayList<>(dimensions.size());
+    for (DimensionSpec dimensionSpec : dimensions) {
+      results.add(dimensionSpec.getName());
+    }
+    return results;
+  }
+
+  public List<MetricSpec> getMetrics() {
+    return metrics;
+  }
+
+  @JsonIgnore
+  public List<String> getMetricNames() {
+    List<String> results = new ArrayList<>(metrics.size());
+    for (MetricSpec metricSpec : metrics) {
+      results.add(metricSpec.getName());
+    }
+    return results;
+  }
+
+  public TimeSpec getInputTime() {
+    return inputTime;
+  }
+
+  public TimeSpec getTime() {
+    return time;
+  }
+
+  public TopkWhitelistSpec getTopKWhitelist() {
+    return topKWhitelist;
+  }
+
+  /**
+   * Returns a set of all dimensions which have either topk or whitelist config
+   * @return
+   */
+  @JsonIgnore
+  public Set<String> getTransformDimensions() {
+    Set<String> transformDimensions = new HashSet<>();
+
+    if (topKWhitelist != null) {
+      List<TopKDimensionToMetricsSpec> topk = topKWhitelist.getTopKDimensionToMetricsSpec();
+      if (topk != null) {
+        for (TopKDimensionToMetricsSpec spec : topk) {
+          transformDimensions.add(spec.getDimensionName());
+        }
+      }
+    }
+    return transformDimensions;
+  }
+
+  public SplitSpec getSplit() {
+    return split;
+  }
+
+  public String encode() throws IOException {
+    return OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(this);
+  }
+
+  public static class Builder {
+    private String collection;
+    private List<DimensionSpec> dimensions;
+    private List<MetricSpec> metrics;
+    private TimeSpec inputTime = new TimeSpec();
+    private TimeSpec time = new TimeSpec();
+    private TopkWhitelistSpec topKWhitelist = new TopkWhitelistSpec();
+    private SplitSpec split = new SplitSpec();
+
+    public String getCollection() {
+      return collection;
+    }
+
+    public Builder setCollection(String collection) {
+      this.collection = collection;
+      return this;
+    }
+
+    public List<DimensionSpec> getDimensions() {
+      return dimensions;
+    }
+
+    public Builder setDimensions(List<DimensionSpec> dimensions) {
+      this.dimensions = dimensions;
+      return this;
+    }
+
+    public List<MetricSpec> getMetrics() {
+      return metrics;
+    }
+
+    public Builder setMetrics(List<MetricSpec> metrics) {
+      this.metrics = metrics;
+      return this;
+    }
+
+    public TimeSpec getInputTime() {
+      return inputTime;
+    }
+
+    public TimeSpec getTime() {
+      return time;
+    }
+
+    public Builder setTime(TimeSpec time) {
+      this.time = time;
+      return this;
+    }
+
+    public TopkWhitelistSpec getTopKWhitelist() {
+      return topKWhitelist;
+    }
+
+    public Builder setTopKWhitelist(TopkWhitelistSpec topKWhitelist) {
+      this.topKWhitelist = topKWhitelist;
+      return this;
+    }
+
+    public SplitSpec getSplit() {
+      return split;
+    }
+
+    public Builder setSplit(SplitSpec split) {
+      this.split = split;
+      return this;
+    }
+
+    public ThirdEyeConfig build() throws Exception {
+      if (collection == null) {
+        throw new IllegalArgumentException("Must provide collection");
+      }
+
+      if (dimensions == null || dimensions.isEmpty()) {
+        throw new IllegalArgumentException("Must provide dimension names");
+      }
+
+      if (metrics == null || metrics.isEmpty()) {
+        throw new IllegalArgumentException("Must provide metric specs");
+      }
+
+      return new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
+    }
+  }
+
+  public static ThirdEyeConfig decode(InputStream inputStream) throws IOException {
+    return OBJECT_MAPPER.readValue(inputStream, ThirdEyeConfig.class);
+  }
+
+  /**
+   * Creates a ThirdEyeConfig object from the Properties object
+   * @param props
+   * @return
+   */
+  public static ThirdEyeConfig fromProperties(Properties props) {
+
+    String collection = getCollectionFromProperties(props);
+    List<DimensionSpec> dimensions = getDimensionFromProperties(props);
+    List<MetricSpec> metrics = getMetricsFromProperties(props);
+    TimeSpec inputTime = getInputTimeFromProperties(props);
+    TimeSpec time = getTimeFromProperties(props);
+    SplitSpec split = getSplitFromProperties(props);
+    TopkWhitelistSpec topKWhitelist = getTopKWhitelistFromProperties(props);
+    ThirdEyeConfig thirdeyeConfig = new ThirdEyeConfig(collection, dimensions, metrics, inputTime, time, topKWhitelist, split);
+    return thirdeyeConfig;
+  }
+
+  private static TopkWhitelistSpec getTopKWhitelistFromProperties(Properties props) {
+    TopkWhitelistSpec topKWhitelist = null;
+
+    Map<String, Double> threshold = getThresholdFromProperties(props);
+    List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = getTopKDimensionToMetricsSpecFromProperties(props);
+    Map<String, List<String>> whitelist = getWhitelistFromProperties(props);
+    Map<String, String> nonWhitelistValue = getNonWhitelistValueFromProperties(props);
+
+    if (threshold != null || topKDimensionToMetricsSpec != null || whitelist != null) {
+      topKWhitelist = new TopkWhitelistSpec();
+      topKWhitelist.setThreshold(threshold);
+      topKWhitelist.setTopKDimensionToMetricsSpec(topKDimensionToMetricsSpec);
+      topKWhitelist.setWhitelist(whitelist);
+      topKWhitelist.setNonWhitelistValue(nonWhitelistValue);
+    }
+    return topKWhitelist;
+  }
+
+  /**
+   * Creates a map of dimension name to the value that should be used for "others"
+   * @param props
+   * @return
+   */
+  private static Map<String, String> getNonWhitelistValueFromProperties(Properties props) {
+    Map<String, String> dimensionToNonWhitelistValueMap = null;
+
+    // create dimension to type map
+    List<DimensionSpec> dimensions = getDimensionFromProperties(props);
+    Map<String, DimensionType> dimensionToType = new HashMap<>();
+    for (int i = 0; i < dimensions.size(); i ++) {
+      DimensionSpec spec = dimensions.get(i);
+      dimensionToType.put(spec.getName(), spec.getDimensionType());
+    }
+
+    // dimensions with  whitelist
+    String whitelistDimensionsStr = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), null);
+    List<String> whitelistDimensions = new ArrayList<>();
+    if (StringUtils.isNotBlank(whitelistDimensionsStr)) {
+      dimensionToNonWhitelistValueMap = new HashMap<>();
+      whitelistDimensions.addAll(Lists.newArrayList(whitelistDimensionsStr.split(FIELD_SEPARATOR)));
+    }
+
+    for (String whitelistDimension : whitelistDimensions) {
+      String nonWhitelistValue = getAndCheck(props,
+          ThirdEyeConfigProperties.THIRDEYE_NONWHITELIST_VALUE_DIMENSION.toString() + CONFIG_JOINER + whitelistDimension, null);
+      if (StringUtils.isNotBlank(nonWhitelistValue)) {
+        dimensionToNonWhitelistValueMap.put(whitelistDimension, nonWhitelistValue);
+      } else {
+        dimensionToNonWhitelistValueMap.put(whitelistDimension, String.valueOf(dimensionToType.get(whitelistDimension).getDefaultOtherValue()));
+      }
+    }
+    return dimensionToNonWhitelistValueMap;
+  }
+
+
+  private static Map<String, List<String>> getWhitelistFromProperties(Properties props) {
+ // create dimension to type map
+    List<DimensionSpec> dimensions = getDimensionFromProperties(props);
+    Map<String, DimensionType> dimensionToType = new HashMap<>();
+    Map<String, Integer> dimensionToIndex = new HashMap<>();
+    for (int i = 0; i < dimensions.size(); i ++) {
+      DimensionSpec spec = dimensions.get(i);
+      dimensionToType.put(spec.getName(), spec.getDimensionType());
+      dimensionToIndex.put(spec.getName(), i);
+    }
+
+    Map<String, List<String>> whitelist = null;
+    String whitelistDimensionsStr = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), null);
+    if (whitelistDimensionsStr != null && whitelistDimensionsStr.split(FIELD_SEPARATOR).length > 0) {
+      whitelist = new HashMap<>();
+      for (String dimension : whitelistDimensionsStr.split(FIELD_SEPARATOR)) {
+        String whitelistValuesStr = getAndCheck(props,
+            ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + CONFIG_JOINER + dimension);
+        String[] whitelistValues = whitelistValuesStr.split(FIELD_SEPARATOR);
+        List<String> whitelistValuesList = Lists.newArrayList(whitelistValues);
+        whitelist.put(dimension, whitelistValuesList);
+      }
+    }
+    return whitelist;
+  }
+
+  private static List<TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpecFromProperties(Properties props) {
+    List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = null;
+    String topKDimensionNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), null);
+    if (StringUtils.isNotEmpty(topKDimensionNames) && topKDimensionNames.split(FIELD_SEPARATOR).length > 0) {
+      topKDimensionToMetricsSpec = new ArrayList<>();
+      for (String dimension : topKDimensionNames.split(FIELD_SEPARATOR)) {
+        String[] topKDimensionMetrics = getAndCheck(props,
+            ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + CONFIG_JOINER + dimension)
+            .split(FIELD_SEPARATOR);
+        String[] topKDimensionKValues = getAndCheck(props,
+            ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + CONFIG_JOINER + dimension)
+            .split(FIELD_SEPARATOR);
+        if (topKDimensionMetrics.length != topKDimensionKValues.length) {
+          throw new IllegalStateException("Number of topk metric names and kvalues should be same for a dimension");
+        }
+        Map<String, Integer> topk = new HashMap<>();
+        for (int i = 0; i < topKDimensionMetrics.length; i++) {
+          topk.put(topKDimensionMetrics[i], Integer.parseInt(topKDimensionKValues[i]));
+        }
+        topKDimensionToMetricsSpec.add(new TopKDimensionToMetricsSpec(dimension, topk));
+      }
+    }
+    return topKDimensionToMetricsSpec;
+  }
+
+  private static Map<String, Double> getThresholdFromProperties(Properties props) {
+    Map<String, Double> threshold = null;
+    String thresholdMetricNames = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString(), null);
+    String metricThresholdValues = getAndCheck(props, ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), null);
+    if (thresholdMetricNames != null && metricThresholdValues != null) {
+      String[] thresholdMetrics = thresholdMetricNames.split(FIELD_SEPARATOR);
+      String[] thresholdValues = metricThresholdValues.split(FIELD_SEPARATOR);
+      if (thresholdMetrics.length != thresholdValues.length) {
+        throw new IllegalStateException("Number of threshold metric names should be same as threshold values");
+      }
+      threshold = new HashMap<>();
+      for (int i = 0; i < thresholdMetrics.length; i++) {
+        threshold.put(thresholdMetrics[i], Double.parseDouble(thresholdValues[i]));
+      }
+    }
+    return threshold;
+  }
+
+  private static SplitSpec getSplitFromProperties(Properties props) {
+    SplitSpec split = null;
+    String splitThreshold = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_SPLIT_THRESHOLD.toString(), null);
+    if (splitThreshold != null) {
+      String splitOrder = getAndCheck(props,
+          ThirdEyeConfigProperties.THIRDEYE_SPLIT_ORDER.toString(), null);
+      List<String> splitOrderList = null;
+      if (splitOrder != null) {
+        splitOrderList = Arrays.asList(splitOrder.split(FIELD_SEPARATOR));
+      }
+      split = new SplitSpec(Integer.parseInt(splitThreshold), splitOrderList);
+    }
+    return split;
+  }
+
+  private static TimeSpec getTimeFromProperties(Properties props) {
+    String timeColumnName = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString());
+    String timeColumnType = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_TYPE.toString(), DEFAULT_TIME_TYPE);
+    String timeColumnSize = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_SIZE.toString(), DEFAULT_TIME_SIZE);
+    TimeGranularity timeGranularity = new TimeGranularity(Integer.parseInt(timeColumnSize), TimeUnit.valueOf(timeColumnType));
+    String timeFormat = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_FORMAT.toString(), DEFAULT_TIME_FORMAT);
+    TimeSpec time = new TimeSpec(timeColumnName, timeGranularity, timeFormat);
+    return time;
+  }
+
+
+  private static TimeSpec getInputTimeFromProperties(Properties props) {
+    TimeSpec inputTime = null;
+    String timeColumnName = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString());
+    String timeColumnType = getAndCheck(props,
+          ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_TYPE.toString(), null);
+    String timeColumnSize = getAndCheck(props,
+          ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_SIZE.toString(), null);
+    String timeFormat = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_FORMAT.toString(), DEFAULT_TIME_FORMAT);
+    if (timeColumnType != null && timeColumnSize != null) {
+      TimeGranularity timeGranularity = new TimeGranularity(Integer.parseInt(timeColumnSize), TimeUnit.valueOf(timeColumnType));
+      inputTime = new TimeSpec(timeColumnName, timeGranularity, timeFormat);
+    }
+    return inputTime;
+  }
+
+  private static List<MetricSpec> getMetricsFromProperties(Properties props) {
+    List<MetricSpec> metrics = new ArrayList<>();
+    String[] metricNames = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()).split(FIELD_SEPARATOR);
+    String[] metricTypes = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()).split(FIELD_SEPARATOR);
+    if (metricNames.length != metricTypes.length) {
+      throw new IllegalStateException("Number of metric names provided "
+          + "should be same as number of metric types");
+    }
+    for (int i = 0; i < metricNames.length; i++) {
+      metrics.add(new MetricSpec(metricNames[i], MetricType.valueOf(metricTypes[i])));
+    }
+    return metrics;
+  }
+
+  private static List<DimensionSpec> getDimensionFromProperties(Properties props) {
+    List<DimensionSpec> dimensions = new ArrayList<>();
+    String[] dimensionNames = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()).split(FIELD_SEPARATOR);
+    String[] dimensionTypes = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString()).split(FIELD_SEPARATOR);
+    for (int i = 0; i < dimensionNames.length; i++) {
+      dimensions.add(new DimensionSpec(dimensionNames[i], DimensionType.valueOf(dimensionTypes[i])));
+    }
+    return dimensions;
+  }
+
+  private static String getCollectionFromProperties(Properties props) {
+    String collection = getAndCheck(props,
+        ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString());
+    return collection;
+  }
+
+  private static String getAndCheck(Properties props, String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+  private static String getAndCheck(Properties props, String propName, String defaultValue) {
+    String propValue = props.getProperty(propName, defaultValue);
+    return propValue;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfigProperties.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfigProperties.java
new file mode 100644
index 0000000000..f59cdeff42
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfigProperties.java
@@ -0,0 +1,105 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+/**
+ * Class for representing all property names used in thirdeye-hadoop jobs
+ */
+public enum ThirdEyeConfigProperties {
+
+  /** Pinot table name */
+  THIRDEYE_TABLE_NAME("thirdeye.table.name"),
+
+  /** Comma Separated dimension names */
+  THIRDEYE_DIMENSION_NAMES("thirdeye.dimension.names"),
+
+  /** Comma Separated dimension types */
+  THIRDEYE_DIMENSION_TYPES("thirdeye.dimension.types"),
+
+  /** Comma separated metric names */
+  THIRDEYE_METRIC_NAMES("thirdeye.metric.names"),
+
+  /** Comma separated metric types */
+  THIRDEYE_METRIC_TYPES("thirdeye.metric.types"),
+
+  /** Time column name */
+  THIRDEYE_TIMECOLUMN_NAME("thirdeye.timecolumn.name"),
+
+  /** Time input column type before aggregation (HOURS, DAYS etc) */
+  THIRDEYE_INPUT_TIMECOLUMN_TYPE("thirdeye.input.timecolumn.type"),
+
+  /** Time input bucket size before aggregation*/
+  THIRDEYE_INPUT_TIMECOLUMN_SIZE("thirdeye.input.timecolumn.size"),
+
+  /** Time format
+   * Can be either EPOCH (default) or SIMPLE_DATE_FORMAT:pattern e.g SIMPLE_DATE_FORMAT:yyyyMMdd  */
+  THIRDEYE_INPUT_TIMECOLUMN_FORMAT("thirdeye.input.timecolumn.format"),
+
+  /** Time column type (HOURS, DAYS etc) */
+  THIRDEYE_TIMECOLUMN_TYPE("thirdeye.timecolumn.type"),
+
+  /** Time bucket size */
+  THIRDEYE_TIMECOLUMN_SIZE("thirdeye.timecolumn.size"),
+
+  /** Time format
+   * Can be either EPOCH (default) or SIMPLE_DATE_FORMAT:pattern e.g SIMPLE_DATE_FORMAT:yyyyMMdd  */
+  THIRDEYE_TIMECOLUMN_FORMAT("thirdeye.timecolumn.format"),
+
+  /** Split threshold for star tree */
+  THIRDEYE_SPLIT_THRESHOLD("thirdeye.split.threshold"),
+
+  /** Split order for star tree */
+  THIRDEYE_SPLIT_ORDER("thirdeye.split.order"),
+
+  /** Comma separated metric names for threshold filtering */
+  THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES("thirdeye.topk.threshold.metric.names"),
+
+  /** Comma separated metric threshold values */
+  THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES("thirdeye.topk.metric.threshold.values"),
+
+  /** Comma separated dimension names for topk config */
+  THIRDEYE_TOPK_DIMENSION_NAMES("thirdeye.topk.dimension.names"),
+
+  /** Use by appending dimension name at the end eg: thirdeye.topk.metrics.d1
+   * Comma separated metrics with topk specification for given dimension */
+  THIRDEYE_TOPK_METRICS("thirdeye.topk.metrics"),
+
+  /** Use by appending dimension name at the end eg: thirdeye.topk.kvalues.d1
+   * Comma separated top k values for corresponding metrics for given dimension */
+  THIRDEYE_TOPK_KVALUES("thirdeye.topk.kvalues"),
+
+  /** Comma separated dimension names which have whitelist */
+  THIRDEYE_WHITELIST_DIMENSION_NAMES("thirdeye.whitelist.dimension.names"),
+
+  /** Use by appending dimension name at the end eg: thirdeye.whitelist.dimension.d1
+   * Comma separated list of values to whitelist for given dimension */
+  THIRDEYE_WHITELIST_DIMENSION("thirdeye.whitelist.dimension"),
+
+  /** Use by appending dimension name at the end eg: thirdeye.nonwhitelist.value.dimension.d1
+   * Value to be used for values which don't belong to whitelist */
+  THIRDEYE_NONWHITELIST_VALUE_DIMENSION("thirdeye.nonwhitelist.value.dimension");
+
+  String name;
+
+  ThirdEyeConfigProperties(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConstants.java
new file mode 100644
index 0000000000..e127ad124d
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConstants.java
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+public final class ThirdEyeConstants {
+  public static final String TOPK_VALUES_FILE = "topk_values";
+  public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormat.forPattern("YYYY-MM-dd-HHmmss");
+  public static final String TOPK_DIMENSION_SUFFIX = "_topk";
+  public static final String OTHER = "other";
+  public static final String EMPTY_STRING = "";
+  public static final Number EMPTY_NUMBER = 0;
+  public static final Double EMPTY_DOUBLE = 0d;
+  public static final Float EMPTY_FLOAT = 0f;
+  public static final Integer EMPTY_INT = 0;
+  public static final Long EMPTY_LONG = 0l;
+  public static final Short EMPTY_SHORT = 0;
+  public static final String SEGMENT_JOINER = "_";
+  public static final String AUTO_METRIC_COUNT = "__COUNT";
+  public static final String FIELD_SEPARATOR = ",";
+  public static final String TAR_SUFFIX = ".tar.gz";
+  public static final String AVRO_SUFFIX = ".avro";
+  public static final String SDF_SEPARATOR = ":";
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TimeGranularity.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TimeGranularity.java
new file mode 100644
index 0000000000..61e06376c1
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TimeGranularity.java
@@ -0,0 +1,102 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * TimeGranularity class contains time unit and time size of the star tree time config
+ *
+ *  unit: the TimeUnit of the column
+ *  size: the bucket size of the time column
+ */
+public class TimeGranularity {
+  private static int DEFAULT_TIME_SIZE = 1;
+
+  private int size = DEFAULT_TIME_SIZE;
+  private TimeUnit unit;
+
+  public TimeGranularity() {
+  }
+
+  public TimeGranularity(int size, TimeUnit unit) {
+    this.size = size;
+    this.unit = unit;
+  }
+
+  @JsonProperty
+  public int getSize() {
+    return size;
+  }
+
+  @JsonProperty
+  public TimeUnit getUnit() {
+    return unit;
+  }
+
+  public long toMillis() {
+    return toMillis(1);
+  }
+
+  /**
+   * Converts time in bucketed unit to millis
+   *
+   * @param time
+   * @return
+   */
+  public long toMillis(long time) {
+    return unit.toMillis(time * size);
+  }
+
+  /**
+   * Converts millis to time unit
+   *
+   * e.g. If TimeGranularity is defined as 1 HOURS,
+   * and we invoke convertToUnit(1458284400000) (i.e. 2016-03-18 00:00:00)
+   * this method will return HOURS.convert(1458284400000, MILLISECONDS)/1 = 405079 hoursSinceEpoch
+   *
+   * If TimeGranularity is defined as 10 MINUTES,
+   * and we invoke convertToUnit(1458284400000) (i.e. 2016-03-18 00:00:00)
+   * this method will return MINUTES.convert(1458284400000, MILLISECONDS)/10 = 2430474 tenMinutesSinceEpoch
+   * @param millis
+   * @return
+   */
+  public long convertToUnit(long millis) {
+    return unit.convert(millis, TimeUnit.MILLISECONDS) / size;
+  }
+
+  @Override
+  public String toString() {
+    return size + "-" + unit;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(size, unit);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (!(obj instanceof TimeGranularity)) {
+      return false;
+    }
+    TimeGranularity other = (TimeGranularity) obj;
+    return Objects.equals(other.size, this.size) && Objects.equals(other.unit, this.unit);
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TimeSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TimeSpec.java
new file mode 100644
index 0000000000..cac8ba3f0e
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TimeSpec.java
@@ -0,0 +1,60 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.pinot.common.data.TimeGranularitySpec.TimeFormat;
+
+import java.util.concurrent.TimeUnit;
+
+/** This class represents the time spec for thirdeye-hadoop jobs
+ * @param columnName - columnName which represents time
+ * @param timeGranularity - time granularity for the time column
+ */
+public class TimeSpec {
+  private static final TimeGranularity DEFAULT_TIME_GRANULARITY = new TimeGranularity(1, TimeUnit.HOURS);
+  private static final String DEFAULT_TIME_FORMAT = TimeFormat.EPOCH.toString();
+
+  private String columnName;
+  private TimeGranularity timeGranularity = DEFAULT_TIME_GRANULARITY;
+  private String timeFormat = DEFAULT_TIME_FORMAT;
+
+  public TimeSpec() {
+  }
+
+  public TimeSpec(String columnName, TimeGranularity timeGranularity, String timeFormat) {
+    this.columnName = columnName;
+    this.timeGranularity = timeGranularity;
+    this.timeFormat = timeFormat;
+  }
+
+  @JsonProperty
+  public String getColumnName() {
+    return columnName;
+  }
+
+  @JsonProperty
+  public TimeGranularity getTimeGranularity() {
+    return timeGranularity;
+  }
+
+  @JsonProperty
+  public String getTimeFormat() {
+    return timeFormat;
+  }
+
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java
new file mode 100644
index 0000000000..c9a7baae91
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TopKDimensionToMetricsSpec.java
@@ -0,0 +1,60 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import java.util.Map;
+
+/**
+ * This class manages config for dimension with topk
+ * config defined on multiple metrics
+ * @param dimensionName - The dimension of this topk config
+ * @param topk - map of metric name to k value
+ */
+public class TopKDimensionToMetricsSpec {
+
+  String dimensionName;
+  Map<String, Integer> topk;
+
+  public TopKDimensionToMetricsSpec() {
+
+  }
+
+  public TopKDimensionToMetricsSpec(String dimensionName, Map<String, Integer> topk) {
+    this.dimensionName = dimensionName;
+    this.topk = topk;
+  }
+
+  public String getDimensionName() {
+    return dimensionName;
+  }
+
+  public void setDimensionName(String dimensionName) {
+    this.dimensionName = dimensionName;
+  }
+
+  public Map<String, Integer> getTopk() {
+    return topk;
+  }
+
+  public void setTopk(Map<String, Integer> topk) {
+    this.topk = topk;
+  }
+
+  public String toString() {
+    return "{ dimensionName : " + dimensionName + ", topk : " + topk + " }";
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TopkWhitelistSpec.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TopkWhitelistSpec.java
new file mode 100644
index 0000000000..40951c305e
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/config/TopkWhitelistSpec.java
@@ -0,0 +1,77 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Config class to define topk and whitelist
+ *
+ * @param threshold - dimension values which do not satisfy metric thresholds will be ignored.
+ * The metric total contributed by a dimension will be compared with the metric total across all the records.
+ *
+ * @param topKDimensionToMetricsSpec - list of dimension and a map of metric to topk value for that dimension
+ * Only top k values for the dimension will be picked, based on metric
+ *
+ * @param whitelist - values to whitelist for given dimension (dimension:whitelist values)
+ *
+ * @param non whitelist value - value to be used for a dimension value, which is not in whitelist
+ */
+public class TopkWhitelistSpec {
+
+  Map<String, Double> threshold;
+  List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec;
+  Map<String, List<String>> whitelist;
+  Map<String, String> nonWhitelistValue;
+
+  public TopkWhitelistSpec() {
+
+  }
+
+  public Map<String, Double> getThreshold() {
+    return threshold;
+  }
+
+  public void setThreshold(Map<String, Double> threshold) {
+    this.threshold = threshold;
+  }
+
+  public List<TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpec() {
+    return topKDimensionToMetricsSpec;
+  }
+
+  public void setTopKDimensionToMetricsSpec(List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec) {
+    this.topKDimensionToMetricsSpec = topKDimensionToMetricsSpec;
+  }
+
+  public Map<String, List<String>> getWhitelist() {
+    return whitelist;
+  }
+
+  public void setWhitelist(Map<String, List<String>> whitelist) {
+    this.whitelist = whitelist;
+  }
+
+  public Map<String, String> getNonWhitelistValue() {
+    return nonWhitelistValue;
+  }
+
+  public void setNonWhitelistValue(Map<String, String> nonWhitelistValue) {
+    this.nonWhitelistValue = nonWhitelistValue;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java
new file mode 100644
index 0000000000..2bde5cb126
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConfig.java
@@ -0,0 +1,135 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation;
+
+import org.apache.pinot.thirdeye.hadoop.config.DimensionSpec;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+import org.apache.pinot.thirdeye.hadoop.config.MetricSpec;
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.config.TopkWhitelistSpec;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This class contains the config needed by TopKColumnTransformation
+ * and the methods to obtain the config from the ThirdEyeConfig
+ */
+public class DerivedColumnTransformationPhaseConfig {
+  private List<String> dimensionNames;
+  private List<DimensionType> dimensionTypes;
+  private List<String> metricNames;
+  private List<MetricType> metricTypes;
+  private String timeColumnName;
+  private Map<String, List<String>> whitelist;
+  private Map<String, String> nonWhitelistValue;
+
+
+  public DerivedColumnTransformationPhaseConfig() {
+
+  }
+
+  /**
+   * @param dimensionNames
+   * @param dimensionTypes
+   * @param metricNames
+   * @param metricTypes
+   * @param timeColumnName
+   * @param whitelist
+   */
+  public DerivedColumnTransformationPhaseConfig(List<String> dimensionNames, List<DimensionType> dimensionTypes,
+      List<String> metricNames, List<MetricType> metricTypes, String timeColumnName,
+      Map<String, List<String>> whitelist, Map<String, String> nonWhitelistValue) {
+    super();
+    this.dimensionNames = dimensionNames;
+    this.dimensionTypes = dimensionTypes;
+    this.metricNames = metricNames;
+    this.metricTypes = metricTypes;
+    this.timeColumnName = timeColumnName;
+    this.whitelist = whitelist;
+    this.nonWhitelistValue = nonWhitelistValue;
+  }
+
+  public List<String> getDimensionNames() {
+    return dimensionNames;
+  }
+
+  public List<DimensionType> getDimensionTypes() {
+    return dimensionTypes;
+  }
+
+  public List<String> getMetricNames() {
+    return metricNames;
+  }
+
+  public List<MetricType> getMetricTypes() {
+    return metricTypes;
+  }
+
+  public String getTimeColumnName() {
+    return timeColumnName;
+  }
+
+  public Map<String, List<String>> getWhitelist() {
+    return whitelist;
+  }
+
+  public Map<String, String> getNonWhitelistValue() {
+    return nonWhitelistValue;
+  }
+
+  public static DerivedColumnTransformationPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
+
+    // metrics
+    List<String> metricNames = new ArrayList<>(config.getMetrics().size());
+    List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
+    for (MetricSpec spec : config.getMetrics()) {
+      metricNames.add(spec.getName());
+      metricTypes.add(spec.getType());
+    }
+
+    // dimensions
+    List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
+    List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
+    for (DimensionSpec spec : config.getDimensions()) {
+      dimensionNames.add(spec.getName());
+      dimensionTypes.add(spec.getDimensionType());
+    }
+
+    // time
+    String timeColumnName = config.getTime().getColumnName();
+
+    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
+    Map<String, List<String>> whitelist = new HashMap<>();
+
+    // topkwhitelist
+    if (topKWhitelist != null && topKWhitelist.getWhitelist() != null) {
+      whitelist.putAll(topKWhitelist.getWhitelist());
+    }
+
+    Map<String, String> nonWhitelistValueMap = new HashMap<>();
+    if (topKWhitelist != null && topKWhitelist.getNonWhitelistValue() != null) {
+      nonWhitelistValueMap.putAll(topKWhitelist.getNonWhitelistValue());
+    }
+
+    return new DerivedColumnTransformationPhaseConfig(dimensionNames, dimensionTypes, metricNames, metricTypes,
+        timeColumnName, whitelist, nonWhitelistValueMap);
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java
new file mode 100644
index 0000000000..770fd057f8
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseConstants.java
@@ -0,0 +1,38 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation;
+
+/**
+ * This class contains the properties to be set for topk column transformation phase
+ */
+public enum DerivedColumnTransformationPhaseConstants {
+  DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH("derived.column.transformation.phase.input.path"),
+  DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH("derived.column.transformation.phase.topk.path"),
+  DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH("derived.column.transformation.phase.output.path"),
+  DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA("derived.column.transformation.phase.output.schema"),
+  DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG("derived.column.transformation.phase.thirdeye.config");
+
+  String name;
+
+  DerivedColumnTransformationPhaseConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java
new file mode 100644
index 0000000000..e81fe7af29
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationPhaseJob.java
@@ -0,0 +1,403 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation;
+
+import static org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH;
+import static org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH;
+import static org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA;
+import static org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG;
+import static org.apache.pinot.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH;
+
+import java.io.DataInput;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.pinot.thirdeye.hadoop.config.DimensionSpec;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+import org.apache.pinot.thirdeye.hadoop.config.MetricSpec;
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfigProperties;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+import org.apache.pinot.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
+import org.apache.pinot.thirdeye.hadoop.config.TopkWhitelistSpec;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.topk.TopKDimensionValues;
+import org.apache.pinot.thirdeye.hadoop.util.ThirdeyeAvroUtils;
+
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.SchemaBuilder.BaseFieldTypeBuilder;
+import org.apache.avro.SchemaBuilder.FieldAssembler;
+import org.apache.avro.SchemaBuilder.RecordBuilder;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.avro.mapreduce.AvroKeyOutputFormat;
+import org.apache.avro.mapreduce.AvroMultipleOutputs;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * This phase will add a new column for every column that has topk config
+ * The new column added will be called "column_topk" (containing only topk values plus any whitelist)
+ * and "column" will contain all values with whitelist applied
+ * For all non topk values, the dimension value will be replaced by "other"
+ * For all non-whitelist values, the dimension value will be replaced by the defaultOtherValue specified in DimensionType
+ * This default other value can be configured, using config like thirdeye.nonwhitelist.value.dimension.d1=x
+ */
+public class DerivedColumnTransformationPhaseJob extends Configured {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DerivedColumnTransformationPhaseJob.class);
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+  private String name;
+  private Properties props;
+
+  /**
+   * @param name
+   * @param props
+   */
+  public DerivedColumnTransformationPhaseJob(String name, Properties props) {
+    super(new Configuration());
+    this.name = name;
+    this.props = props;
+  }
+
+  public static class DerivedColumnTransformationPhaseMapper
+      extends Mapper<AvroKey<GenericRecord>, NullWritable, AvroKey<GenericRecord>, NullWritable> {
+
+    private Schema outputSchema;
+    private ThirdEyeConfig thirdeyeConfig;
+    private DerivedColumnTransformationPhaseConfig config;
+    private List<String> dimensionsNames;
+    private List<DimensionType> dimensionsTypes;
+    private List<String> metricNames;
+    private List<MetricType> metricTypes;
+    private TopKDimensionValues topKDimensionValues;
+    private Map<String, Set<String>> topKDimensionsMap;
+    private Map<String, List<String>> whitelist;
+    private Map<String, String> nonWhitelistValueMap;
+    private String timeColumnName;
+
+    private AvroMultipleOutputs avroMultipleOutputs;
+    String inputFileName;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+      LOGGER.info("DerivedColumnTransformationPhaseJob.DerivedColumnTransformationPhaseMapper.setup()");
+      Configuration configuration = context.getConfiguration();
+      FileSystem fs = FileSystem.get(configuration);
+
+      FileSplit fileSplit = (FileSplit) context.getInputSplit();
+      inputFileName = fileSplit.getPath().getName();
+      inputFileName = inputFileName.substring(0, inputFileName.lastIndexOf(ThirdEyeConstants.AVRO_SUFFIX));
+      LOGGER.info("split name:" + inputFileName);
+
+      thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
+      config = DerivedColumnTransformationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
+      dimensionsNames = config.getDimensionNames();
+      dimensionsTypes = config.getDimensionTypes();
+      metricNames = config.getMetricNames();
+      metricTypes = config.getMetricTypes();
+      timeColumnName = config.getTimeColumnName();
+      whitelist = config.getWhitelist();
+      nonWhitelistValueMap = config.getNonWhitelistValue();
+
+      outputSchema = new Schema.Parser().parse(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString()));
+
+      Path topKPath = new Path(configuration.get(DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString())
+          + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE);
+      topKDimensionValues = new TopKDimensionValues();
+      if (fs.exists(topKPath)) {
+        FSDataInputStream topkValuesStream = fs.open(topKPath);
+        topKDimensionValues = OBJECT_MAPPER.readValue((DataInput) topkValuesStream, TopKDimensionValues.class);
+        topkValuesStream.close();
+      }
+      topKDimensionsMap = topKDimensionValues.getTopKDimensions();
+
+      avroMultipleOutputs = new AvroMultipleOutputs(context);
+    }
+
+
+    @Override
+    public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
+        throws IOException, InterruptedException {
+
+      // input record
+      GenericRecord inputRecord = key.datum();
+
+      // output record
+      GenericRecord outputRecord = new Record(outputSchema);
+
+      // dimensions
+      for (int i = 0; i < dimensionsNames.size(); i++) {
+
+        String dimensionName = dimensionsNames.get(i);
+        DimensionType dimensionType = dimensionsTypes.get(i);
+        Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
+        String dimensionValueStr = String.valueOf(dimensionValue);
+
+
+        // add original dimension value with whitelist applied
+        Object whitelistDimensionValue = dimensionValue;
+        if (whitelist != null) {
+          List<String> whitelistDimensions = whitelist.get(dimensionName);
+          if (CollectionUtils.isNotEmpty(whitelistDimensions)) {
+            // whitelist config exists for this dimension but value not present in whitelist
+            if (!whitelistDimensions.contains(dimensionValueStr)) {
+              whitelistDimensionValue = dimensionType.getValueFromString(nonWhitelistValueMap.get(dimensionName));
+            }
+          }
+        }
+        outputRecord.put(dimensionName, whitelistDimensionValue);
+
+        // add column for topk, if topk config exists for that column, plus any whitelist values
+        if (topKDimensionsMap.containsKey(dimensionName)) {
+          Set<String> topKDimensionValues = topKDimensionsMap.get(dimensionName);
+          // if topk config exists for that dimension
+          if (CollectionUtils.isNotEmpty(topKDimensionValues)) {
+            String topkDimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
+            Object topkDimensionValue = dimensionValue;
+            // topk config exists for this dimension, but value not present in topk or whitelist
+            if (!topKDimensionValues.contains(dimensionValueStr) &&
+                (whitelist == null || whitelist.get(dimensionName) == null
+                || !whitelist.get(dimensionName).contains(dimensionValueStr))) {
+              topkDimensionValue = ThirdEyeConstants.OTHER;
+            }
+            outputRecord.put(topkDimensionName, String.valueOf(topkDimensionValue));
+          }
+        }
+      }
+
+      // metrics
+      for (int i = 0; i < metricNames.size(); i ++) {
+        String metricName = metricNames.get(i);
+        MetricType metricType = metricTypes.get(i);
+        outputRecord.put(metricName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName, metricType));
+      }
+
+      // time
+      outputRecord.put(timeColumnName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName));
+
+      AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
+      avroMultipleOutputs.write(outputKey, NullWritable.get(), inputFileName);
+    }
+
+    @Override
+    public void cleanup(Context context) throws IOException, InterruptedException {
+      avroMultipleOutputs.close();
+    }
+
+
+  }
+
+  public Job run() throws Exception {
+    Job job = Job.getInstance(getConf());
+    job.setJobName(name);
+    job.setJarByClass(DerivedColumnTransformationPhaseJob.class);
+
+    Configuration configuration = job.getConfiguration();
+    FileSystem fs = FileSystem.get(configuration);
+
+    // Input Path
+    String inputPathDir = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_INPUT_PATH);
+    LOGGER.info("Input path dir: " + inputPathDir);
+    for (String inputPath : inputPathDir.split(",")) {
+      LOGGER.info("Adding input:" + inputPath);
+      Path input = new Path(inputPath);
+      FileInputFormat.addInputPath(job, input);
+    }
+
+    // Topk path
+    String topkPath = getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH);
+    LOGGER.info("Topk path : " + topkPath);
+
+    // Output path
+    Path outputPath = new Path(getAndSetConfiguration(configuration, DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH));
+    LOGGER.info("Output path dir: " + outputPath.toString());
+    if (fs.exists(outputPath)) {
+      fs.delete(outputPath, true);
+    }
+    FileOutputFormat.setOutputPath(job, outputPath);
+
+    // Schema
+    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
+    LOGGER.info("Schema : {}", avroSchema.toString(true));
+
+    // ThirdEyeConfig
+    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
+    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
+    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
+    job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(),
+        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
+    LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
+
+    // New schema
+    Schema outputSchema = newSchema(thirdeyeConfig);
+    job.getConfiguration().set(DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(), outputSchema.toString());
+
+    // Map config
+    job.setMapperClass(DerivedColumnTransformationPhaseMapper.class);
+    job.setInputFormatClass(AvroKeyInputFormat.class);
+    job.setMapOutputKeyClass(AvroKey.class);
+    job.setMapOutputValueClass(NullWritable.class);
+    AvroJob.setOutputKeySchema(job, outputSchema);
+    LazyOutputFormat.setOutputFormatClass(job, AvroKeyOutputFormat.class);
+    AvroMultipleOutputs.addNamedOutput(job, "avro", AvroKeyOutputFormat.class, outputSchema);
+
+    job.setNumReduceTasks(0);
+
+    job.waitForCompletion(true);
+
+    return job;
+  }
+
+
+  public Schema newSchema(ThirdEyeConfig thirdeyeConfig) {
+    Schema outputSchema = null;
+
+    Set<String> topKTransformDimensionSet = new HashSet<>();
+    TopkWhitelistSpec topkWhitelist = thirdeyeConfig.getTopKWhitelist();
+
+    // gather topk columns
+    if (topkWhitelist != null) {
+      List<TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecs = topkWhitelist.getTopKDimensionToMetricsSpec();
+      if (topKDimensionToMetricsSpecs != null) {
+        for (TopKDimensionToMetricsSpec topKDimensionToMetricsSpec : topKDimensionToMetricsSpecs) {
+          topKTransformDimensionSet.add(topKDimensionToMetricsSpec.getDimensionName());
+        }
+      }
+    }
+    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record(thirdeyeConfig.getCollection());
+    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
+
+    // add new column for topk columns
+    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
+      String dimensionName = dimensionSpec.getName();
+      DimensionType dimensionType = dimensionSpec.getDimensionType();
+      BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(dimensionName).type().nullable();
+
+      switch (dimensionType) {
+      case DOUBLE:
+        fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
+        break;
+      case FLOAT:
+        fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
+        break;
+      case INT:
+      case SHORT:
+        fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
+        break;
+      case LONG:
+        fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
+        break;
+      case STRING:
+        fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
+        break;
+      default:
+        throw new IllegalArgumentException("Unsupported dimensionType " + dimensionType);
+      }
+      if (topKTransformDimensionSet.contains(dimensionName)) {
+        fieldAssembler = fieldAssembler.name(dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX).type().nullable().stringType().noDefault();
+      }
+    }
+
+    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
+      String metric = metricSpec.getName();
+      MetricType metricType = metricSpec.getType();
+      BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(metric).type().nullable();
+
+      switch (metricType) {
+        case SHORT:
+        case INT:
+          fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
+          break;
+        case FLOAT:
+          fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
+          break;
+        case DOUBLE:
+          fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
+          break;
+        case LONG:
+        default:
+          fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
+      }
+    }
+
+    String timeColumnName = thirdeyeConfig.getTime().getColumnName();
+    fieldAssembler = fieldAssembler.name(timeColumnName).type().longType().noDefault();
+
+    outputSchema = fieldAssembler.endRecord();
+    LOGGER.info("New schema {}", outputSchema.toString(true));
+
+    return outputSchema;
+  }
+
+  private String getAndSetConfiguration(Configuration configuration,
+      DerivedColumnTransformationPhaseConstants constant) {
+    String value = getAndCheck(constant.toString());
+    configuration.set(constant.toString(), value);
+    return value;
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("usage: config.properties");
+    }
+
+    Properties props = new Properties();
+    props.load(new FileInputStream(args[0]));
+    DerivedColumnTransformationPhaseJob job = new DerivedColumnTransformationPhaseJob("derived_column_transformation_job", props);
+    job.run();
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DefaultJoinConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DefaultJoinConfigUDF.java
new file mode 100644
index 0000000000..cfaf36a76b
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DefaultJoinConfigUDF.java
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import org.apache.hadoop.mapreduce.Job;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DefaultJoinConfigUDF implements JoinConfigUDF {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultJoinConfigUDF.class);
+
+  @Override
+  public void setJoinConfig(Job job) {
+
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java
new file mode 100644
index 0000000000..f84ba3646f
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DefaultJoinKeyExtractor.java
@@ -0,0 +1,54 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import java.util.Map;
+
+import org.apache.avro.generic.GenericRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DefaultJoinKeyExtractor implements JoinKeyExtractor {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultJoinKeyExtractor.class);
+
+  private Map<String, String> joinKeyMap;
+  private String defaultJoinKey;
+
+  public DefaultJoinKeyExtractor(Map<String, String> params) {
+    this.joinKeyMap = params;
+    this.defaultJoinKey = params.get("defaultJoinKey");
+  }
+
+  @Override
+  public String extractJoinKey(String sourceName, GenericRecord record) {
+
+    String joinKey = defaultJoinKey;
+    if (joinKeyMap != null && joinKeyMap.containsKey(sourceName)) {
+      joinKey = joinKeyMap.get(sourceName);
+    }
+    String ret = "INVALID";
+    if (joinKey != null) {
+      Object object = record.get(joinKey);
+      if (object != null) {
+        ret = object.toString();
+      }
+    }
+    LOGGER.info("source:{} JoinKey:{} value:{}", sourceName, joinKey, ret);
+    return ret;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java
new file mode 100644
index 0000000000..6b21d98d31
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/DelegatingAvroKeyInputFormat.java
@@ -0,0 +1,77 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.avro.mapreduce.AvroKeyRecordReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.codehaus.jackson.JsonParseException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+
+import org.codehaus.jackson.type.TypeReference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DelegatingAvroKeyInputFormat<T> extends AvroKeyInputFormat<T> {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DelegatingAvroKeyInputFormat.class);
+  private static TypeReference MAP_STRING_STRING_TYPE = new TypeReference<Map<String, String>>() {
+  };
+
+  public org.apache.hadoop.mapreduce.RecordReader<org.apache.avro.mapred.AvroKey<T>, NullWritable> createRecordReader(
+      InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
+    LOGGER.info("DelegatingAvroKeyInputFormat.createRecordReader()  for split:{}", split);
+    FileSplit fileSplit = (FileSplit) split;
+    Configuration configuration = context.getConfiguration();
+    String sourceName = getSourceNameFromPath(fileSplit, configuration);
+    LOGGER.info("Source Name for path {} : {}", fileSplit.getPath(), sourceName);
+    Map<String, String> schemaJSONMapping = new ObjectMapper()
+        .readValue(configuration.get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
+
+    LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
+
+    String sourceSchemaJSON = schemaJSONMapping.get(sourceName);
+
+    Schema schema = new Schema.Parser().parse(sourceSchemaJSON);
+    return new AvroKeyRecordReader<T>(schema);
+  }
+
+  public static String getSourceNameFromPath(FileSplit fileSplit, Configuration configuration)
+      throws IOException, JsonParseException, JsonMappingException {
+    String content = configuration.get("schema.path.mapping");
+    Map<String, String> schemaPathMapping =
+        new ObjectMapper().readValue(content, MAP_STRING_STRING_TYPE);
+    LOGGER.info("Schema Path Mapping: {}", schemaPathMapping);
+
+    String sourceName = null;
+    for (String path : schemaPathMapping.keySet()) {
+      if (fileSplit.getPath().toString().indexOf(path) > -1) {
+        sourceName = schemaPathMapping.get(path);
+        break;
+      }
+    }
+    return sourceName;
+  };
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/GenericJoinUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/GenericJoinUDF.java
new file mode 100644
index 0000000000..ec526e692f
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/GenericJoinUDF.java
@@ -0,0 +1,85 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.pinot.thirdeye.hadoop.join.GenericJoinUDFConfig.Field;
+
+public class GenericJoinUDF implements JoinUDF {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(GenericJoinUDF.class);
+  private GenericJoinUDFConfig config;
+  private Schema outputSchema;
+  private List<Field> fields;
+
+  public GenericJoinUDF(Map<String, String> params) {
+    LOGGER.info("Initializing GenericJoinUDF with params:" + params);
+    this.config = new GenericJoinUDFConfig(params);
+    fields = config.getFields();
+  }
+
+  @Override
+  public void init(Schema outputSchema) {
+    this.outputSchema = outputSchema;
+  }
+
+  /**
+   * Trivial implementation of a generic join udf. Assumes the data type is the
+   * same in source and output.
+   */
+  @Override
+  public List<GenericRecord> performJoin(Object joinKeyVal,
+      Map<String, List<GenericRecord>> joinInput) {
+
+    List<GenericRecord> outputRecords = new ArrayList<GenericRecord>();
+    GenericRecord outputRecord = new GenericData.Record(outputSchema);
+    for (Field field : fields) {
+      Object value = null;
+      // try to find the field in one of the source events, break out as soon as
+      // we find a non null value
+      for (String source : field.sourceEvents) {
+        List<GenericRecord> list = joinInput.get(source);
+        if (list != null && list.size() >= 1) {
+          for (GenericRecord record : list) {
+            value = record.get(field.name);
+            if (value != null) {
+              break;
+            }
+          }
+        }
+        if (value != null) {
+          break;
+        }
+      }
+      if (value != null) {
+        outputRecord.put(field.name, value);
+      }
+    }
+    outputRecords.add(outputRecord);
+    return outputRecords;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/GenericJoinUDFConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/GenericJoinUDFConfig.java
new file mode 100644
index 0000000000..b36b4a9f7f
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/GenericJoinUDFConfig.java
@@ -0,0 +1,100 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+
+public class GenericJoinUDFConfig {
+
+  List<Field> fields;
+
+  public GenericJoinUDFConfig(Map<String, String> params) {
+    fields = new ArrayList<Field>();
+    String fieldNamesString = params.get("field.names");
+    String[] split = fieldNamesString.split(",");
+    for (String fieldName : split) {
+      Field field = new Field();
+      field.name = fieldName;
+      String type = params.get(fieldName + ".type");
+      if (type != null) {
+        field.type = Schema.Type.valueOf(type.toUpperCase());
+      }
+      field.sourceEvents = new ArrayList<String>();
+      String[] fieldSources = params.get(fieldName + ".sources").split(",");
+      for (String fieldSource : fieldSources) {
+        field.sourceEvents.add(fieldSource.trim());
+      }
+      fields.add(field);
+    }
+  }
+
+  public List<Field> getFields() {
+    return fields;
+  }
+
+  public void setFields(List<Field> fields) {
+    this.fields = fields;
+  }
+
+  /*
+   * For now support name and source Name. Will be nice to support data type
+   * conversion and transform function in future
+   */
+  public static class Field {
+    String name;
+    List<String> sourceEvents;
+    Schema.Type type;
+    List<String> tranformFunc;
+
+    public String getName() {
+      return name;
+    }
+
+    public void setName(String name) {
+      this.name = name;
+    }
+
+    public Type getType() {
+      return type;
+    }
+
+    public void setType(Type type) {
+      this.type = type;
+    }
+
+    public List<String> getSourceEvents() {
+      return sourceEvents;
+    }
+
+    public void setSourceEvents(List<String> sourceEvents) {
+      this.sourceEvents = sourceEvents;
+    }
+
+    public List<String> getTranformFunc() {
+      return tranformFunc;
+    }
+
+    public void setTranformFunc(List<String> tranformFunc) {
+      this.tranformFunc = tranformFunc;
+    }
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinConfigUDF.java
new file mode 100644
index 0000000000..46ad2d6d0c
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinConfigUDF.java
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Simple interface to transform a Generic Record
+ */
+public interface JoinConfigUDF {
+
+  /**
+   * @param record
+   * @return
+   */
+  void setJoinConfig(Job job);
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinKeyExtractor.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinKeyExtractor.java
new file mode 100644
index 0000000000..c519334c4c
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinKeyExtractor.java
@@ -0,0 +1,32 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import org.apache.avro.generic.GenericRecord;
+
+/**
+ * Simple interface to extract the joinKey from a Generic Record
+ */
+public interface JoinKeyExtractor {
+  /**
+   * @param sourceName name of the source
+   * @param record record from which the join Key is extracted. join key value is expected to be a
+   *          string.
+   * @return
+   */
+  String extractJoinKey(String sourceName, GenericRecord record);
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinPhaseConstants.java
new file mode 100644
index 0000000000..aec7921783
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinPhaseConstants.java
@@ -0,0 +1,42 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+public enum JoinPhaseConstants {
+  // SCHEMA AND INPUT PER SOURCE actual property access would be {source}.join.input.path
+  JOIN_INPUT_SCHEMA("join.input.schema"), // one schema for each source
+  JOIN_INPUT_PATH("join.input.path"), // one input for each source
+  JOIN_OUTPUT_PATH("join.output.path"),
+  JOIN_OUTPUT_SCHEMA("join.output.schema"),
+  JOIN_SOURCE_NAMES("join.source.names"), // comma separated list of sources
+  JOIN_CONFIG_UDF_CLASS("join.config.udf.class"),
+  JOIN_UDF_CLASS("join.udf.class"),
+  JOIN_KEY_EXTRACTOR_CLASS("join.key.extractor.class"),
+  JOIN_KEY_EXTRACTOR_CONFIG("join.key.extractor.config"), // one for each source
+  JOIN_UDF_CONFIG("join.udf.config"); // one for each source
+
+  String name;
+
+  JoinPhaseConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinPhaseJob.java
new file mode 100644
index 0000000000..912513606e
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinPhaseJob.java
@@ -0,0 +1,394 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import static org.apache.pinot.thirdeye.hadoop.join.JoinPhaseConstants.*;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.avro.mapreduce.AvroKeyOutputFormat;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.CounterGroup;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Lists;
+
+/**
+ * This is a generic join job that can be used to prepare the data for Third
+ * Eye. Many teams just need a way to join multiple data sets into one.
+ * Currently they do this by using pig script which is highly inefficient, since
+ * it does a pair wise join. The idea is as follows there are N named sources,
+ * there is a join key common across all these sources. <br/>
+ * S1: join key s1_key <br/>
+ * S2: join key s2_key <br/>
+ * ... <br/>
+ * SN: join key sn_key<br/>
+ */
+public class JoinPhaseJob extends Configured {
+  private static final Logger LOGGER = LoggerFactory.getLogger(JoinPhaseJob.class);
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+  private String name;
+  private Properties props;
+
+  public JoinPhaseJob(String name, Properties props) {
+    super(new Configuration());
+    this.name = name;
+    this.props = props;
+  }
+
+  public static class GenericJoinMapper
+      extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
+    String sourceName;
+    JoinKeyExtractor joinKeyExtractor;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+
+      LOGGER.info("GenericAvroJoinJob.GenericJoinMapper.setup()");
+      FileSplit fileSplit = (FileSplit) context.getInputSplit();
+      LOGGER.info("split name:" + fileSplit.toString());
+      Configuration configuration = context.getConfiguration();
+
+      try {
+        sourceName = DelegatingAvroKeyInputFormat.getSourceNameFromPath(fileSplit, configuration);
+        LOGGER.info("Input: {} belongs to Source:{}", fileSplit, sourceName);
+        String joinKeyExtractorClass = configuration.get(JOIN_KEY_EXTRACTOR_CLASS.toString());
+
+        Map<String, String> params = new HashMap<>();
+        List<String> sourceNames = Lists.newArrayList(configuration.get(JOIN_SOURCE_NAMES.toString()).split(","));
+        for (String sourceName : sourceNames) {
+          String joinKeyExtractorConfig = configuration.get(sourceName + "." + JOIN_KEY_EXTRACTOR_CONFIG.toString());
+          if (StringUtils.isNotBlank(joinKeyExtractorConfig)) {
+            params.put(sourceName, joinKeyExtractorConfig);
+          }
+        }
+        LOGGER.info("Initializing JoinKeyExtractorClass:{} with params:{}", joinKeyExtractorClass, params);
+        Constructor<?> constructor = Class.forName(joinKeyExtractorClass).getConstructor(Map.class);
+        joinKeyExtractor = (JoinKeyExtractor) constructor.newInstance(params);
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+
+    }
+
+    @Override
+    public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
+        throws IOException, InterruptedException {
+      GenericRecord record = recordWrapper.datum();
+      MapOutputValue mapOutputValue = new MapOutputValue(record.getSchema().getName(), record);
+      String joinKeyValue = joinKeyExtractor.extractJoinKey(sourceName, record);
+      LOGGER.info("Join Key:{}", joinKeyValue);
+
+      if (!"INVALID".equals(joinKeyValue)) {
+        context.write(new BytesWritable(joinKeyValue.toString().getBytes()),
+            new BytesWritable(mapOutputValue.toBytes()));
+      }
+    }
+
+  }
+
+  public static class GenericJoinReducer
+      extends Reducer<BytesWritable, BytesWritable, AvroKey<GenericRecord>, NullWritable> {
+
+    String statOutputDir;
+    private FileSystem fileSystem;
+    private static TypeReference MAP_STRING_STRING_TYPE = new TypeReference<Map<String, String>>() {
+    };
+    private Map<String, Schema> schemaMap = new HashMap<String, Schema>();
+    private JoinUDF joinUDF;
+    private Map<String, AtomicInteger> countersMap = new HashMap<String, AtomicInteger>();
+    private List<String> sourceNames;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+      Configuration configuration = context.getConfiguration();
+      fileSystem = FileSystem.get(configuration);
+
+      try {
+
+        Map<String, String> schemaJSONMapping = new ObjectMapper().readValue(
+            context.getConfiguration().get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
+
+        LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
+        for (String sourceName : schemaJSONMapping.keySet()) {
+          Schema schema = new Schema.Parser().parse(schemaJSONMapping.get(sourceName));
+          schemaMap.put(sourceName, schema);
+        }
+        sourceNames = Lists.newArrayList(configuration.get(JOIN_SOURCE_NAMES.toString()).split(","));
+        String joinUDFClass = configuration.get(JOIN_UDF_CLASS.toString());
+        Map<String, String> params = new HashMap<>();
+        for (String sourceName : sourceNames) {
+          String joinUdfConfig = configuration.get(sourceName + "." + JOIN_UDF_CONFIG.toString());
+          if (StringUtils.isNotBlank(joinUdfConfig)) {
+            params.put(sourceName, joinUdfConfig);
+          }
+        }
+
+        Constructor<?> constructor = Class.forName(joinUDFClass).getConstructor(Map.class);
+        LOGGER.info("Initializing JoinUDFClass:{} with params:{}", joinUDFClass, params);
+        joinUDF = (JoinUDF) constructor.newInstance(params);
+        String outputSchemaPath = configuration.get(JOIN_OUTPUT_SCHEMA.toString());
+        // Avro schema
+        Schema.Parser parser = new Schema.Parser();
+        Schema outputSchema = parser.parse(fileSystem.open(new Path(outputSchemaPath)));
+        LOGGER.info("Setting outputschema:{}", outputSchema);
+        joinUDF.init(outputSchema);
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+    }
+
+    @Override
+    public void reduce(BytesWritable joinKeyWritable, Iterable<BytesWritable> recordBytesWritable,
+        Context context) throws IOException, InterruptedException {
+      Map<String, List<GenericRecord>> joinInput = new HashMap<String, List<GenericRecord>>();
+      for (BytesWritable writable : recordBytesWritable) {
+
+        byte[] bytes = writable.copyBytes();
+        MapOutputValue mapOutputValue = MapOutputValue.fromBytes(bytes, schemaMap);
+        String schemaName = mapOutputValue.getSchemaName();
+        if (!joinInput.containsKey(schemaName)) {
+          joinInput.put(schemaName, new ArrayList<GenericRecord>());
+        }
+        joinInput.get(schemaName).add(mapOutputValue.getRecord());
+      }
+
+      int[] exists = new int[sourceNames.size()];
+      for (int i = 0; i < sourceNames.size(); i++) {
+        String source = sourceNames.get(i);
+        if (joinInput.containsKey(source)) {
+          exists[i] = 1;
+        } else {
+          exists[i] = 0;
+        }
+      }
+      String counterName = Arrays.toString(exists);
+      if (!countersMap.containsKey(counterName)) {
+        countersMap.put(counterName, new AtomicInteger(0));
+      }
+      countersMap.get(counterName).incrementAndGet();
+      // invoke the udf and pass in the join data
+      List<GenericRecord> outputRecords =
+          joinUDF.performJoin(new String(joinKeyWritable.copyBytes()), joinInput);
+      if (outputRecords != null) {
+        for (GenericRecord outputRecord : outputRecords) {
+          context.write(new AvroKey<GenericRecord>(outputRecord), NullWritable.get());
+        }
+      }
+    }
+
+    protected void cleanup(Context context) throws IOException, InterruptedException {
+      for (String counterName : countersMap.keySet()) {
+        context.getCounter("DynamicCounter", counterName)
+            .increment(countersMap.get(counterName).get());
+      }
+    }
+  }
+
+  public Job run() throws Exception {
+    Job job = Job.getInstance(getConf());
+    Configuration conf = job.getConfiguration();
+    job.setJobName(name);
+    job.setJarByClass(JoinPhaseJob.class);
+
+    FileSystem fs = FileSystem.get(conf);
+
+    String outputSchemaPath = getAndSetConfiguration(conf, JOIN_OUTPUT_SCHEMA);
+    Schema.Parser parser = new Schema.Parser();
+    Schema outputSchema = parser.parse(fs.open(new Path(outputSchemaPath)));
+    LOGGER.info("{}", outputSchema);
+
+    // Set custom config like adding distributed caches
+    String joinConfigUDFClass = getAndSetConfiguration(conf, JoinPhaseConstants.JOIN_CONFIG_UDF_CLASS);
+    LOGGER.info("Initializing JoinConfigUDFClass:{} with params:{}", joinConfigUDFClass);
+    Constructor<?> constructor = Class.forName(joinConfigUDFClass).getConstructor();
+    JoinConfigUDF joinConfigUDF = (JoinConfigUDF) constructor.newInstance();
+    joinConfigUDF.setJoinConfig(job);
+    getAndSetConfiguration(conf, JOIN_KEY_EXTRACTOR_CLASS);
+    getAndSetConfiguration(conf, JOIN_UDF_CLASS);
+
+    List<String> sourceNames = Lists.newArrayList(
+        getAndSetConfiguration(conf, JoinPhaseConstants.JOIN_SOURCE_NAMES).split(","));
+
+    // Map config
+    job.setMapperClass(GenericJoinMapper.class);
+    // AvroJob.setInputKeySchema(job, unionSchema);
+    job.setInputFormatClass(DelegatingAvroKeyInputFormat.class);
+    job.setMapOutputKeyClass(BytesWritable.class);
+    job.setMapOutputValueClass(BytesWritable.class);
+
+    // Reduce config
+    job.setReducerClass(GenericJoinReducer.class);
+    AvroJob.setOutputKeySchema(job, outputSchema);
+    job.setOutputFormatClass(AvroKeyOutputFormat.class);
+    job.setOutputKeyClass(AvroKey.class);
+    job.setOutputValueClass(NullWritable.class);
+
+    String numReducers = props.getProperty("num.reducers");
+    if (numReducers != null) {
+      job.setNumReduceTasks(Integer.parseInt(numReducers));
+    } else {
+      job.setNumReduceTasks(10);
+    }
+    LOGGER.info("Setting number of reducers : " + job.getNumReduceTasks());
+    Map<String, String> schemaMap = new HashMap<String, String>();
+    Map<String, String> schemaPathMapping = new HashMap<String, String>();
+
+    for (String sourceName : sourceNames) {
+      // load schema for each source
+      LOGGER.info("Loading Schema for {}", sourceName);
+
+      FSDataInputStream schemaStream =
+          fs.open(new Path(getAndCheck(sourceName + "." + JOIN_INPUT_SCHEMA.toString())));
+      Schema schema = new Schema.Parser().parse(schemaStream);
+      schemaMap.put(sourceName, schema.toString());
+      LOGGER.info("Schema for {}:  \n{}", sourceName, schema);
+
+      // configure input data for each source
+      String inputPathDir = getAndCheck(sourceName + "." + JOIN_INPUT_PATH.toString());
+      LOGGER.info("Input path dir for " + sourceName + ": " + inputPathDir);
+      for (String inputPath : inputPathDir.split(",")) {
+        Path input = new Path(inputPath);
+        FileStatus[] listFiles = fs.listStatus(input);
+        boolean isNested = false;
+        for (FileStatus fileStatus : listFiles) {
+          if (fileStatus.isDirectory()) {
+            isNested = true;
+            Path path = fileStatus.getPath();
+            LOGGER.info("Adding input:" + path);
+            FileInputFormat.addInputPath(job, path);
+            schemaPathMapping.put(path.toString(), sourceName);
+          }
+        }
+        if (!isNested) {
+          LOGGER.info("Adding input:" + inputPath);
+          FileInputFormat.addInputPath(job, input);
+          schemaPathMapping.put(input.toString(), sourceName);
+        }
+      }
+    }
+    StringWriter temp = new StringWriter();
+    OBJECT_MAPPER.writeValue(temp, schemaPathMapping);
+    job.getConfiguration().set("schema.path.mapping", temp.toString());
+
+    temp = new StringWriter();
+    OBJECT_MAPPER.writeValue(temp, schemaMap);
+    job.getConfiguration().set("schema.json.mapping", temp.toString());
+
+    Path outputPath = new Path(getAndCheck(JOIN_OUTPUT_PATH.toString()));
+    if (fs.exists(outputPath)) {
+      fs.delete(outputPath, true);
+    }
+    FileOutputFormat.setOutputPath(job, new Path(getAndCheck(JOIN_OUTPUT_PATH.toString())));
+
+    for (Object key : props.keySet()) {
+      conf.set(key.toString(), props.getProperty(key.toString()));
+    }
+
+    job.waitForCompletion(true);
+
+    dumpSummary(job, sourceNames);
+
+    return job;
+  }
+
+  private void dumpSummary(Job job, List<String> sourceNames) throws IOException {
+    System.out.println("Join Input Matrix.");
+    CounterGroup group = job.getCounters().getGroup("DynamicCounter");
+    for (String source : sourceNames) {
+      System.out.print(String.format("%25s\t", source));
+    }
+    if (group != null) {
+      Iterator<Counter> iterator = group.iterator();
+      while (iterator.hasNext()) {
+        Counter counter = iterator.next();
+        String displayName = counter.getDisplayName();
+        String[] split = displayName.replace("[", "").replace("[", "").split(",");
+        for (String str : split) {
+          if (str.trim().equals("1")) {
+            System.out.print(String.format("%25s\t", "1"));
+          } else {
+            System.out.print(String.format("%25s\t", "-"));
+          }
+        }
+      }
+    }
+  }
+
+  private String getAndSetConfiguration(Configuration configuration,
+      JoinPhaseConstants constant) {
+    String value = getAndCheck(constant.toString());
+    configuration.set(constant.toString(), value);
+    return value;
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("usage: config.properties");
+    }
+
+    Properties props = new Properties();
+    props.load(new FileInputStream(args[0]));
+
+    JoinPhaseJob job = new JoinPhaseJob("aggregate_avro_job", props);
+    job.run();
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinUDF.java
new file mode 100644
index 0000000000..852f2a6ccd
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/JoinUDF.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+
+public interface JoinUDF {
+  /**
+   * Initializes by providing the output schema.
+   * @param outputSchema
+   */
+  void init(Schema outputSchema);
+
+  /**
+   * @param joinKey common key used to join all the sources
+   * @param joinInput Mapping from sourceName to GenericRecord(s)
+   * @return
+   */
+  List<GenericRecord> performJoin(Object joinKeyVal, Map<String, List<GenericRecord>> joinInput);
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/MapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/MapOutputKey.java
new file mode 100644
index 0000000000..3c1b34fa59
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/MapOutputKey.java
@@ -0,0 +1,21 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+public class MapOutputKey {
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/MapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/MapOutputValue.java
new file mode 100644
index 0000000000..c928743a0e
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/join/MapOutputValue.java
@@ -0,0 +1,103 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.join;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+
+public class MapOutputValue {
+
+  private static BinaryDecoder binaryDecoder;
+  private String schemaName;
+  private GenericRecord record;
+  private GenericDatumWriter<GenericRecord> WRITER;
+  private EncoderFactory factory = EncoderFactory.get();
+
+  private BinaryEncoder binaryEncoder;
+
+  public MapOutputValue(String schemaName, GenericRecord record) {
+    this.schemaName = schemaName;
+    this.record = record;
+  }
+
+  public String getSchemaName() {
+    return schemaName;
+  }
+
+  public GenericRecord getRecord() {
+    return record;
+  }
+
+  public byte[] toBytes() throws IOException {
+    ByteArrayOutputStream dataStream = new ByteArrayOutputStream();
+    Schema schema = record.getSchema();
+    if (WRITER == null) {
+      WRITER = new GenericDatumWriter<GenericRecord>(schema);
+    }
+    binaryEncoder = factory.directBinaryEncoder(dataStream, binaryEncoder);
+    WRITER.write(record, binaryEncoder);
+
+    // serialize to bytes, we also need to know the schema name when we
+    // process this record on the reducer since reducer gets the record from
+    // multiple mappers. So we first write the schema/source name and then
+    // write the serialized bytes
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(out);
+    dos.writeInt(schema.getName().getBytes().length);
+    dos.write(schema.getName().getBytes());
+    byte[] dataBytes = dataStream.toByteArray();
+
+    dos.writeInt(dataBytes.length);
+    dos.write(dataBytes);
+    return out.toByteArray();
+  }
+
+  public static MapOutputValue fromBytes(byte[] bytes, Map<String, Schema> schemaMap)
+      throws IOException {
+    DataInputStream dataInputStream = new DataInputStream(new ByteArrayInputStream(bytes));
+    int length = dataInputStream.readInt();
+    byte[] sourceNameBytes = new byte[length];
+    dataInputStream.read(sourceNameBytes);
+    String schemaName = new String(sourceNameBytes);
+
+    int recordDataLength = dataInputStream.readInt();
+
+    byte[] recordBytes = new byte[recordDataLength];
+    dataInputStream.read(recordBytes);
+    Schema schema = schemaMap.get(schemaName);
+    GenericRecord record = new GenericData.Record(schema);
+    binaryDecoder = DecoderFactory.get().binaryDecoder(recordBytes, binaryDecoder);
+    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema);
+    gdr.read(record, binaryDecoder);
+    return new MapOutputValue(schemaName, record);
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/DefaultSegmentPushUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/DefaultSegmentPushUDF.java
new file mode 100644
index 0000000000..03ee08a0eb
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/DefaultSegmentPushUDF.java
@@ -0,0 +1,33 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.push;
+
+import java.util.Properties;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DefaultSegmentPushUDF implements SegmentPushUDF {
+  private static final Logger LOG = LoggerFactory.getLogger(DefaultSegmentPushUDF.class);
+
+  @Override
+  public void emitCustomEvents(Properties properties) {
+    // do nothing
+    LOG.info("Default segment push udf");
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java
new file mode 100644
index 0000000000..df99b4e5a1
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/DeleteOverlappingSegmentsInPinot.java
@@ -0,0 +1,145 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.push;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.helix.AccessOption;
+import org.apache.helix.BaseDataAccessor;
+import org.apache.helix.HelixDataAccessor;
+import org.apache.helix.PropertyKey;
+import org.apache.helix.PropertyKey.Builder;
+import org.apache.helix.ZNRecord;
+import org.apache.helix.manager.zk.ZKHelixDataAccessor;
+import org.apache.helix.manager.zk.ZNRecordSerializer;
+import org.apache.helix.manager.zk.ZkBaseDataAccessor;
+import org.apache.helix.manager.zk.ZkClient;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.IdealState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DeleteOverlappingSegmentsInPinot {
+
+  private static final Logger LOG = LoggerFactory.getLogger(DeleteOverlappingSegmentsInPinot.class);
+
+  public static void main(String[] args) throws Exception {
+    String zkUrl = args[0];
+    String zkCluster = args[1];
+    String tableName = args[2];
+    deleteOverlappingSegments(zkUrl, zkCluster, tableName);
+  }
+
+  private static IdealState computeNewIdealStateAfterDeletingOverlappingSegments(HelixDataAccessor helixDataAccessor, PropertyKey idealStatesKey) {
+    IdealState is = helixDataAccessor.getProperty(idealStatesKey);
+    // compute existing DAILY segments
+    Set<String> daysWithDailySegments = new HashSet<>();
+    for (String segmentName : is.getPartitionSet()) {
+      LOG.info("Segment Name : {}", segmentName);
+      if (segmentName.indexOf("DAILY") > -1) {
+        String[] splits = segmentName.split("_");
+        String endDay = splits[splits.length - 2].substring(0, "yyyy-mm-dd".length());
+        String startDay = splits[splits.length - 3].substring(0, "yyyy-mm-dd".length());
+        LOG.info("Start : {} End : {}", startDay, endDay);
+        daysWithDailySegments.add(startDay);
+      }
+    }
+    // compute list of HOURLY segments to be deleted
+    Set<String> hourlySegmentsToDelete = new TreeSet<>();
+    for (String segmentName : is.getPartitionSet()) {
+      LOG.info("Segment name {}", segmentName);
+      if (segmentName.indexOf("HOURLY") > -1) {
+        String[] splits = segmentName.split("_");
+        String endDay = splits[splits.length - 2].substring(0, "yyyy-mm-dd".length());
+        String startDay = splits[splits.length - 3].substring(0, "yyyy-mm-dd".length());
+        LOG.info("Start : {} End : {}", startDay, endDay);
+        if (daysWithDailySegments.contains(startDay)) {
+          hourlySegmentsToDelete.add(segmentName);
+        }
+      }
+    }
+    LOG.info("HOURLY segments that can be deleted: {}", hourlySegmentsToDelete.size());
+    LOG.info("Hourly segments to delete {}", hourlySegmentsToDelete.toString().replaceAll(",", "\n"));
+    IdealState newIdealState = new IdealState(is.getRecord());
+    for (String hourlySegmentToDelete : hourlySegmentsToDelete) {
+      newIdealState.getRecord().getMapFields().remove(hourlySegmentToDelete);
+    }
+    return newIdealState;
+  }
+
+  public static boolean deleteOverlappingSegments(String zkUrl, String zkCluster, String tableName) {
+    boolean updateSuccessful = false;
+
+    if (!tableName.endsWith("_OFFLINE")) {
+      tableName = tableName + "_OFFLINE";
+    }
+
+    ZkClient zkClient = new ZkClient(zkUrl);
+    ZNRecordSerializer zkSerializer = new ZNRecordSerializer();
+    zkClient.setZkSerializer(zkSerializer);
+    BaseDataAccessor<ZNRecord> baseDataAccessor = new ZkBaseDataAccessor<>(zkClient);
+    HelixDataAccessor helixDataAccessor = new ZKHelixDataAccessor(zkCluster, baseDataAccessor);
+    Builder keyBuilder = helixDataAccessor.keyBuilder();
+    PropertyKey idealStateKey = keyBuilder.idealStates(tableName);
+    PropertyKey externalViewKey = keyBuilder.externalView(tableName);
+    IdealState currentIdealState = helixDataAccessor.getProperty(idealStateKey);
+    byte[] serializeIS = zkSerializer.serialize(currentIdealState.getRecord());
+    String name = tableName + ".idealstate." + System.currentTimeMillis();
+    File outputFile = new File("/tmp", name);
+
+    try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) {
+      IOUtils.write(serializeIS, fileOutputStream);
+    } catch (IOException e) {
+      LOG.error("Exception in delete overlapping segments", e);
+      return updateSuccessful;
+    }
+    LOG.info("Saved current idealstate to {}", outputFile);
+    IdealState newIdealState;
+    do {
+      newIdealState = computeNewIdealStateAfterDeletingOverlappingSegments(helixDataAccessor, idealStateKey);
+      LOG.info("Updating IdealState");
+      updateSuccessful = helixDataAccessor.getBaseDataAccessor().set(idealStateKey.getPath(), newIdealState.getRecord(), newIdealState.getRecord().getVersion(), AccessOption.PERSISTENT);
+      if (updateSuccessful) {
+        int numSegmentsDeleted = currentIdealState.getPartitionSet().size() - newIdealState.getPartitionSet().size();
+        LOG.info("Successfully updated IdealState: Removed segments: {}", (numSegmentsDeleted));
+      }
+    } while (!updateSuccessful);
+
+    try {
+      while (true) {
+        Thread.sleep(10000);
+        ExternalView externalView = helixDataAccessor.getProperty(externalViewKey);
+        IdealState idealState = helixDataAccessor.getProperty(idealStateKey);
+        Set<String> evPartitionSet = externalView.getPartitionSet();
+        Set<String> isPartitionSet = idealState.getPartitionSet();
+        if (evPartitionSet.equals(isPartitionSet)) {
+          LOG.info("Table {} has reached stable state. i.e segments in external view match idealstates", tableName);
+          break;
+        }
+      }
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    }
+    return updateSuccessful;
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushControllerAPIs.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushControllerAPIs.java
new file mode 100644
index 0000000000..fbf93738e7
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushControllerAPIs.java
@@ -0,0 +1,225 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.push;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpDelete;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.util.EntityUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Joiner;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+
+/**
+ * Contains APIs which can be used for segment operations
+ * such as listing, deleting overlap
+ */
+public class SegmentPushControllerAPIs {
+
+  private static Logger LOGGER = LoggerFactory.getLogger(SegmentPushControllerAPIs.class);
+  private String[] controllerHosts;
+  private int controllerPort;
+  private HttpHost controllerHttpHost;
+
+  private static final String OFFLINE_SEGMENTS = "OFFLINE";
+  private static String DAILY_SCHEDULE = "DAILY";
+  private static String HOURLY_SCHEDULE = "HOURLY";
+  private static String SEGMENTS_ENDPOINT = "/segments/";
+  private static String TABLES_ENDPOINT = "/tables/";
+  private static String TYPE_PARAMETER = "?type=offline";
+  private static String UTF_8 = "UTF-8";
+  private static long TIMEOUT = 120000;
+  private static String DATE_JOINER = "-";
+
+  SegmentPushControllerAPIs(String[] controllerHosts, String controllerPort) {
+    this.controllerHosts = controllerHosts;
+    this.controllerPort = Integer.valueOf(controllerPort);
+  }
+
+  public void deleteOverlappingSegments(String tableName, String segmentName) throws IOException {
+    if (segmentName.contains(DAILY_SCHEDULE)) {
+      for (String controllerHost : controllerHosts) {
+        controllerHttpHost = new HttpHost(controllerHost, controllerPort);
+
+        LOGGER.info("Getting overlapped segments for {}*************", segmentName);
+        List<String> overlappingSegments = getOverlappingSegments(tableName, segmentName);
+
+        if (overlappingSegments.isEmpty()) {
+          LOGGER.info("No overlapping segments found");
+        } else {
+          LOGGER.info("Deleting overlapped segments****************");
+          deleteOverlappingSegments(tableName, overlappingSegments);
+        }
+      }
+    } else {
+      LOGGER.info("No overlapping segments to delete for HOURLY");
+    }
+  }
+
+  private List<String> getOverlappingSegments(String tablename, String segmentName) throws IOException {
+
+    List<String> overlappingSegments = new ArrayList<>();
+    String pattern = getOverlapPattern(segmentName, tablename);
+    if (pattern != null) {
+      LOGGER.info("Finding segments overlapping to {} with pattern {}", segmentName, pattern);
+      List<String> allSegments = getAllSegments(tablename, segmentName);
+      overlappingSegments = getOverlappingSegments(allSegments, pattern);
+    }
+    return overlappingSegments;
+  }
+
+  public List<String> getOverlappingSegments(List<String> allSegments, String pattern) {
+    List<String> overlappingSegments = new ArrayList<>();
+    for (String segment : allSegments) {
+      if (segment.startsWith(pattern)) {
+        LOGGER.info("Found overlapping segment {}", segment);
+        overlappingSegments.add(segment);
+      }
+    }
+    return overlappingSegments;
+  }
+
+  public String getOverlapPattern(String segmentName, String tablename) {
+    String pattern = null;
+    // segment name format: table[_*]Name_schedule_startDate_endDate
+    String[] tokens = segmentName.split(ThirdEyeConstants.SEGMENT_JOINER);
+    int size = tokens.length;
+    if (size > 3) {
+      String startDateToken = tokens[size - 2];
+      if (startDateToken.lastIndexOf(DATE_JOINER) != -1) {
+        String datePrefix = startDateToken.substring(0, startDateToken.lastIndexOf(DATE_JOINER));
+        pattern = Joiner.on(ThirdEyeConstants.SEGMENT_JOINER).join(tablename, HOURLY_SCHEDULE, datePrefix);
+      }
+    }
+    return pattern;
+  }
+
+  private List<String> getAllSegments(String tablename, String segmentName) throws IOException {
+    List<String> allSegments = new ArrayList<>();
+
+    HttpClient controllerClient = new DefaultHttpClient();
+    HttpGet req = new HttpGet(SEGMENTS_ENDPOINT + URLEncoder.encode(tablename, UTF_8));
+    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
+    try {
+      if (res.getStatusLine().getStatusCode() != 200) {
+        throw new IllegalStateException(res.getStatusLine().toString());
+      }
+      InputStream content = res.getEntity().getContent();
+      JsonNode segmentsData = new ObjectMapper().readTree(content);
+
+      if (segmentsData != null) {
+        JsonNode offlineSegments = segmentsData.get(0).get(OFFLINE_SEGMENTS);
+        if (offlineSegments != null) {
+          for (JsonNode segment : offlineSegments) {
+            allSegments.add(segment.asText());
+          }
+        }
+      }
+      LOGGER.info("All segments : {}", allSegments);
+    } finally {
+      if (res.getEntity() != null) {
+        EntityUtils.consume(res.getEntity());
+      }
+    }
+    return allSegments;
+  }
+
+  private boolean isDeleteSuccessful(String tablename, String segmentName) throws IOException {
+
+    boolean deleteSuccessful = false;
+    HttpClient controllerClient = new DefaultHttpClient();
+    // this endpoint gets from ideal state
+    HttpGet req = new HttpGet(TABLES_ENDPOINT + URLEncoder.encode(tablename, UTF_8) + SEGMENTS_ENDPOINT);
+    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
+    try {
+      if (res.getStatusLine().getStatusCode() != 200) {
+        throw new IllegalStateException(res.getStatusLine().toString());
+      }
+      InputStream content = res.getEntity().getContent();
+      String response = IOUtils.toString(content);
+      LOGGER.info("All segments from ideal state {}", response);
+      String decoratedSegmentName = "\\\""+segmentName+"\\\"";
+      LOGGER.info("Decorated segment name {}", decoratedSegmentName);
+      if (!response.contains(decoratedSegmentName)) {
+        deleteSuccessful = true;
+        LOGGER.info("Delete successful");
+      } else {
+        LOGGER.info("Delete failed");
+      }
+    } finally {
+      if (res.getEntity() != null) {
+        EntityUtils.consume(res.getEntity());
+      }
+
+    }
+    return deleteSuccessful;
+
+  }
+
+
+  private void deleteOverlappingSegments(String tablename, List<String> overlappingSegments) throws IOException {
+
+    for (String segment : overlappingSegments) {
+      boolean deleteSuccessful = false;
+      long elapsedTime = 0;
+      long startTimeMillis = System.currentTimeMillis();
+      while (elapsedTime < TIMEOUT && !deleteSuccessful) {
+        deleteSuccessful = deleteSegment(tablename, segment);
+        LOGGER.info("Response {} while deleting segment {} from table {}", deleteSuccessful, segment, tablename);
+        long currentTimeMillis = System.currentTimeMillis();
+        elapsedTime = elapsedTime + (currentTimeMillis - startTimeMillis);
+      }
+    }
+  }
+
+  private boolean deleteSegment(String tablename, String segmentName) throws IOException {
+    boolean deleteSuccessful = false;
+
+    HttpClient controllerClient = new DefaultHttpClient();
+    HttpDelete req = new HttpDelete(SEGMENTS_ENDPOINT + URLEncoder.encode(tablename, UTF_8) + "/"
+        + URLEncoder.encode(segmentName, UTF_8)
+        + TYPE_PARAMETER);
+    HttpResponse res = controllerClient.execute(controllerHttpHost, req);
+    try {
+      if (res == null || res.getStatusLine() == null || res.getStatusLine().getStatusCode() != 200
+          || !isDeleteSuccessful(tablename, segmentName)) {
+        LOGGER.info("Exception in deleting segment, trying again {}", res);
+      } else {
+        deleteSuccessful = true;
+      }
+    } finally {
+      if (res.getEntity() != null) {
+        EntityUtils.consume(res.getEntity());
+      }
+    }
+    return deleteSuccessful;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushPhase.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushPhase.java
new file mode 100644
index 0000000000..e5554a8a2c
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushPhase.java
@@ -0,0 +1,178 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.push;
+
+import static org.apache.pinot.thirdeye.hadoop.push.SegmentPushPhaseConstants.*;
+
+import org.apache.pinot.common.utils.SimpleHttpResponse;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Constructor;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.pinot.common.utils.FileUploadDownloadClient;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfigProperties;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+
+/**
+ * This class pushed pinot segments generated by SegmentCreation
+ * onto the pinot cluster
+ */
+public class SegmentPushPhase  extends Configured {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(SegmentPushPhase.class);
+  private final String name;
+  private final Properties props;
+  private String[] hosts;
+  private String port;
+  private String tablename;
+  private boolean uploadSuccess = true;
+  private String segmentName = null;
+  private String segmentPushUDFClass;
+  SegmentPushControllerAPIs segmentPushControllerAPIs;
+
+
+  public SegmentPushPhase(String jobName, Properties properties) throws Exception {
+    super(new Configuration());
+    name = jobName;
+    props = properties;
+  }
+
+  public void run() throws Exception {
+    Configuration configuration = new Configuration();
+    FileSystem fs = FileSystem.get(configuration);
+
+    long startTime = System.currentTimeMillis();
+
+    String segmentPath = getAndSetConfiguration(configuration, SEGMENT_PUSH_INPUT_PATH);
+    LOGGER.info("Segment path : {}", segmentPath);
+    hosts = getAndSetConfiguration(configuration, SEGMENT_PUSH_CONTROLLER_HOSTS).split(ThirdEyeConstants.FIELD_SEPARATOR);
+    port = getAndSetConfiguration(configuration, SEGMENT_PUSH_CONTROLLER_PORT);
+    tablename = getAndCheck(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString());
+    segmentPushUDFClass = props.getProperty(SEGMENT_PUSH_UDF_CLASS.toString(), DefaultSegmentPushUDF.class.getCanonicalName());
+
+    Path path = new Path(segmentPath);
+    FileStatus[] fileStatusArr = fs.globStatus(path);
+    for (FileStatus fileStatus : fileStatusArr) {
+      if (fileStatus.isDirectory()) {
+        pushDir(fs, fileStatus.getPath());
+      } else {
+        pushOneTarFile(fs, fileStatus.getPath());
+      }
+    }
+    long endTime = System.currentTimeMillis();
+
+    if (uploadSuccess && segmentName != null) {
+      props.setProperty(SEGMENT_PUSH_START_TIME.toString(), String.valueOf(startTime));
+      props.setProperty(SEGMENT_PUSH_END_TIME.toString(), String.valueOf(endTime));
+
+      segmentPushControllerAPIs = new SegmentPushControllerAPIs(hosts, port);
+      LOGGER.info("Deleting segments overlapping to {} from table {}  ", segmentName, tablename);
+      segmentPushControllerAPIs.deleteOverlappingSegments(tablename, segmentName);
+
+      try {
+        LOGGER.info("Initializing SegmentPushUDFClass:{}", segmentPushUDFClass);
+        Constructor<?> constructor = Class.forName(segmentPushUDFClass).getConstructor();
+        SegmentPushUDF segmentPushUDF = (SegmentPushUDF) constructor.newInstance();
+        segmentPushUDF.emitCustomEvents(props);
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+    }
+
+  }
+
+  public void pushDir(FileSystem fs, Path path) throws Exception {
+    LOGGER.info("******** Now uploading segments tar from dir: {}", path);
+    FileStatus[] fileStatusArr = fs.listStatus(new Path(path.toString() + "/"));
+    for (FileStatus fileStatus : fileStatusArr) {
+      if (fileStatus.isDirectory()) {
+        pushDir(fs, fileStatus.getPath());
+      } else {
+        pushOneTarFile(fs, fileStatus.getPath());
+      }
+    }
+  }
+
+  public void pushOneTarFile(FileSystem fs, Path path) throws Exception {
+    String fileName = path.getName();
+    if (!fileName.endsWith(".tar.gz")) {
+      return;
+    }
+    long length = fs.getFileStatus(path).getLen();
+    try (FileUploadDownloadClient fileUploadDownloadClient = new FileUploadDownloadClient()) {
+      for (String host : hosts) {
+        try (InputStream inputStream = fs.open(path)) {
+          fileName = fileName.split(".tar.gz")[0];
+          if (fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER) != -1) {
+            segmentName = fileName.substring(0, fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER));
+          }
+          LOGGER.info("******** Uploading file: {} to Host: {} and Port: {} *******", fileName, host, port);
+          SimpleHttpResponse simpleHttpResponse = fileUploadDownloadClient.uploadSegment(
+              FileUploadDownloadClient.getUploadSegmentHttpURI(host, Integer.parseInt(port)), fileName, inputStream);
+          int responseCode = simpleHttpResponse.getStatusCode();
+          LOGGER.info("Response code: {}", responseCode);
+          if (responseCode != 200) {
+            uploadSuccess = false;
+          }
+        } catch (Exception e) {
+          LOGGER.error("******** Error Uploading file: {} to Host: {} and Port: {}  *******", fileName, host, port);
+          LOGGER.error("Caught exception during upload", e);
+          throw new RuntimeException("Got Error during send tar files to push hosts!");
+        }
+      }
+    }
+  }
+
+
+  private String getAndSetConfiguration(Configuration configuration,
+      SegmentPushPhaseConstants constant) {
+    String value = getAndCheck(constant.toString());
+    configuration.set(constant.toString(), value);
+    return value;
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("usage: config.properties");
+    }
+
+    Properties props = new Properties();
+    props.load(new FileInputStream(args[0]));
+
+    SegmentPushPhase job = new SegmentPushPhase("segment_push_job", props);
+    job.run();
+  }
+
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushPhaseConstants.java
new file mode 100644
index 0000000000..c16e87e546
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushPhaseConstants.java
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.push;
+
+/**
+ * Class containing properties to be set for segment push
+ */
+public enum SegmentPushPhaseConstants {
+
+  SEGMENT_PUSH_INPUT_PATH("segment.push.input.path"),
+  SEGMENT_PUSH_UDF_CLASS("segment.push.udf.class"),
+  SEGMENT_PUSH_CONTROLLER_HOSTS("segment.push.controller.hosts"),
+  SEGMENT_PUSH_CONTROLLER_PORT("segment.push.controller.port"),
+  SEGMENT_PUSH_START_TIME("segment.push.start.time"),
+  SEGMENT_PUSH_END_TIME("segment.push.end.time");
+
+  String name;
+
+  SegmentPushPhaseConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
\ No newline at end of file
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushUDF.java
new file mode 100644
index 0000000000..c01f066411
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/push/SegmentPushUDF.java
@@ -0,0 +1,25 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.push;
+
+import java.util.Properties;
+
+public interface SegmentPushUDF {
+
+  void emitCustomEvents(Properties properties);
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java
new file mode 100644
index 0000000000..e4e0105ea0
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseConstants.java
@@ -0,0 +1,41 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.segment.creation;
+
+/**
+ * Properties to be set for the segment creation phase
+ */
+public enum SegmentCreationPhaseConstants {
+
+  SEGMENT_CREATION_INPUT_PATH("segment.creation.input.path"),
+  SEGMENT_CREATION_OUTPUT_PATH("segment.creation.output.path"),
+  SEGMENT_CREATION_THIRDEYE_CONFIG("segment.creation.thirdeye.config"),
+  SEGMENT_CREATION_WALLCLOCK_START_TIME("segment.creation.wallclock.start.time"),
+  SEGMENT_CREATION_WALLCLOCK_END_TIME("segment.creation.wallclock.end.time"),
+  SEGMENT_CREATION_SCHEDULE("segment.creation.schedule"),
+  SEGMENT_CREATION_BACKFILL("segment.creation.backfill");
+
+  String name;
+
+  SegmentCreationPhaseConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
\ No newline at end of file
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java
new file mode 100644
index 0000000000..2b68beebca
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseJob.java
@@ -0,0 +1,223 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.segment.creation;
+
+import static org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_INPUT_PATH;
+import static org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_OUTPUT_PATH;
+import static org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_SCHEDULE;
+import static org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_THIRDEYE_CONFIG;
+import static org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_END_TIME;
+import static org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_WALLCLOCK_START_TIME;
+import static org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.SEGMENT_CREATION_BACKFILL;
+
+import java.io.FileInputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobContext;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfigProperties;
+import org.apache.pinot.thirdeye.hadoop.util.ThirdeyeAvroUtils;
+
+/**
+ * This class contains the job that generates pinot segments with star tree index
+ */
+public class SegmentCreationPhaseJob extends Configured {
+
+  private static final String TEMP = "temp";
+  private static final String DEFAULT_BACKFILL = "false";
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(SegmentCreationPhaseJob.class);
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+  private final String name;
+  private final Properties props;
+
+
+  public SegmentCreationPhaseJob(String jobName, Properties properties) throws Exception {
+    super(new Configuration());
+    getConf().set("mapreduce.job.user.classpath.first", "true");
+    name = jobName;
+    props = properties;
+
+  }
+
+  public Job run() throws Exception {
+
+    Job job = Job.getInstance(getConf());
+
+    job.setJarByClass(SegmentCreationPhaseJob.class);
+    job.setJobName(name);
+
+    FileSystem fs = FileSystem.get(getConf());
+
+    Configuration configuration = job.getConfiguration();
+
+    String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
+    LOGGER.info("Input path : {}", inputSegmentDir);
+    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputSegmentDir);
+    LOGGER.info("Schema : {}", avroSchema);
+    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
+    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
+    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
+    LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
+    String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
+    LOGGER.info("Output path : {}", outputDir);
+    Path stagingDir = new Path(outputDir, TEMP);
+    LOGGER.info("Staging dir : {}", stagingDir);
+    String segmentWallClockStart = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_START_TIME);
+    LOGGER.info("Segment wallclock start time : {}", segmentWallClockStart);
+    String segmentWallClockEnd = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_END_TIME);
+    LOGGER.info("Segment wallclock end time : {}", segmentWallClockEnd);
+    String schedule = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEDULE);
+    LOGGER.info("Segment schedule : {}", schedule);
+    String isBackfill = props.getProperty(SEGMENT_CREATION_BACKFILL.toString(), DEFAULT_BACKFILL);
+    configuration.set(SEGMENT_CREATION_BACKFILL.toString(), isBackfill);
+    LOGGER.info("Is Backfill : {}", configuration.get(SEGMENT_CREATION_BACKFILL.toString()));
+
+    // Create temporary directory
+    if (fs.exists(stagingDir)) {
+      LOGGER.warn("Found the temp folder, deleting it");
+      fs.delete(stagingDir, true);
+    }
+    fs.mkdirs(stagingDir);
+    fs.mkdirs(new Path(stagingDir + "/input/"));
+
+    // Create output directory
+    if (fs.exists(new Path(outputDir))) {
+      LOGGER.warn("Found the output folder deleting it");
+      fs.delete(new Path(outputDir), true);
+    }
+    fs.mkdirs(new Path(outputDir));
+
+    // Read input files
+    List<FileStatus> inputDataFiles = new ArrayList<>();
+    for (String input : inputSegmentDir.split(",")) {
+      Path inputPathPattern = new Path(input);
+      inputDataFiles.addAll(Arrays.asList(fs.listStatus(inputPathPattern)));
+    }
+    LOGGER.info("size {}", inputDataFiles.size());
+
+    try {
+      for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
+        FileStatus file = inputDataFiles.get(seqId);
+        String completeFilePath = " " + file.getPath().toString() + " " + seqId;
+        Path newOutPutFile = new Path((stagingDir + "/input/" + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
+        FSDataOutputStream stream = fs.create(newOutPutFile);
+        LOGGER.info("wrote {}", completeFilePath);
+        stream.writeUTF(completeFilePath);
+        stream.flush();
+        stream.close();
+      }
+    } catch (Exception e) {
+      LOGGER.error("Exception while reading input files ", e);
+    }
+
+    job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);
+
+    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
+      job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
+    }
+
+    job.setInputFormatClass(TextInputFormat.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+
+    job.setMapOutputKeyClass(LongWritable.class);
+    job.setMapOutputValueClass(Text.class);
+
+    FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
+    FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));
+
+    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
+    job.getConfiguration().set(SEGMENT_CREATION_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
+
+    job.setMaxReduceAttempts(1);
+    job.setMaxMapAttempts(0);
+    job.setNumReduceTasks(0);
+    for (Object key : props.keySet()) {
+      job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
+    }
+
+    job.waitForCompletion(true);
+    if (!job.isSuccessful()) {
+      throw new RuntimeException("Job failed : " + job);
+    }
+
+    LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
+    FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
+    for (FileStatus segment : segmentArr) {
+      fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
+    }
+
+    // Delete temporary directory.
+    LOGGER.info("Cleanup the working directory.");
+    LOGGER.info("Deleting the dir: {}", stagingDir);
+    fs.delete(stagingDir, true);
+
+    return job;
+  }
+
+  private String getAndSetConfiguration(Configuration configuration,
+      SegmentCreationPhaseConstants constant) {
+    String value = getAndCheck(constant.toString());
+    configuration.set(constant.toString(), value);
+    return value;
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("usage: config.properties");
+    }
+
+    Properties props = new Properties();
+    props.load(new FileInputStream(args[0]));
+    SegmentCreationPhaseJob job = new SegmentCreationPhaseJob("segment_creation_job", props);
+    job.run();
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java
new file mode 100644
index 0000000000..75feb6af88
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/segment/creation/SegmentCreationPhaseMapReduceJob.java
@@ -0,0 +1,320 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.segment.creation;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
+import com.google.common.base.Joiner;
+import org.apache.pinot.common.data.FieldSpec;
+import org.apache.pinot.common.data.Schema;
+import org.apache.pinot.common.data.StarTreeIndexSpec;
+import org.apache.pinot.common.data.TimeGranularitySpec.TimeFormat;
+import org.apache.pinot.common.utils.TarGzCompressionUtils;
+import org.apache.pinot.core.data.readers.FileFormat;
+import org.apache.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
+import org.apache.pinot.core.segment.creator.StatsCollectorConfig;
+import org.apache.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.core.segment.creator.impl.stats.LongColumnPreIndexStatsCollector;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+import org.apache.pinot.thirdeye.hadoop.util.ThirdeyePinotSchemaUtils;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.pinot.core.segment.creator.impl.V1Constants.MetadataKeys.Segment.*;
+import static org.apache.pinot.thirdeye.hadoop.segment.creation.SegmentCreationPhaseConstants.*;
+
+/**
+ * Mapper class for SegmentCreation job, which sets configs required for
+ * segment generation with star tree index
+ */
+public class SegmentCreationPhaseMapReduceJob {
+
+  public static class SegmentCreationMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
+    private static final Logger LOGGER = LoggerFactory.getLogger(SegmentCreationPhaseMapReduceJob.class);
+    private static ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory());
+
+    private Configuration properties;
+
+    private String inputFilePath;
+    private String outputPath;
+    private String tableName;
+
+    private Path currentHdfsWorkDir;
+    private String currentDiskWorkDir;
+
+    // Temporary HDFS path for local machine
+    private String localHdfsSegmentTarPath;
+
+    private String localDiskSegmentDirectory;
+    private String localDiskSegmentTarPath;
+
+    private ThirdEyeConfig thirdeyeConfig;
+    private Schema schema;
+
+    private Long segmentWallClockStartTime;
+    private Long segmentWallClockEndTime;
+    private String segmentSchedule;
+    private boolean isBackfill;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+
+      currentHdfsWorkDir = FileOutputFormat.getWorkOutputPath(context);
+      currentDiskWorkDir = "pinot_hadoop_tmp";
+
+      // Temporary HDFS path for local machine
+      localHdfsSegmentTarPath = currentHdfsWorkDir + "/segmentTar";
+
+      // Temporary DISK path for local machine
+      localDiskSegmentDirectory = currentDiskWorkDir + "/segments/";
+      localDiskSegmentTarPath = currentDiskWorkDir + "/segmentsTar/";
+      new File(localDiskSegmentTarPath).mkdirs();
+
+      LOGGER.info("*********************************************************************");
+      LOGGER.info("Configurations : {}", context.getConfiguration().toString());
+      LOGGER.info("*********************************************************************");
+      LOGGER.info("Current HDFS working dir : {}", currentHdfsWorkDir);
+      LOGGER.info("Current DISK working dir : {}", new File(currentDiskWorkDir).getAbsolutePath());
+      LOGGER.info("*********************************************************************");
+      properties = context.getConfiguration();
+
+      outputPath = properties.get(SEGMENT_CREATION_OUTPUT_PATH.toString());
+
+      thirdeyeConfig = OBJECT_MAPPER.readValue(properties.get(SEGMENT_CREATION_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
+      LOGGER.info(thirdeyeConfig.encode());
+      schema = ThirdeyePinotSchemaUtils.createSchema(thirdeyeConfig);
+      tableName = thirdeyeConfig.getCollection();
+
+      segmentWallClockStartTime = Long.valueOf(properties.get(SEGMENT_CREATION_WALLCLOCK_START_TIME.toString()));
+      segmentWallClockEndTime = Long.valueOf(properties.get(SEGMENT_CREATION_WALLCLOCK_END_TIME.toString()));
+      segmentSchedule = properties.get(SEGMENT_CREATION_SCHEDULE.toString());
+      isBackfill = Boolean.valueOf(properties.get(SEGMENT_CREATION_BACKFILL.toString()));
+    }
+
+    @Override
+    public void cleanup(Context context) throws IOException, InterruptedException {
+      FileUtils.deleteQuietly(new File(currentDiskWorkDir));
+    }
+
+    @Override
+    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
+
+      String line = value.toString();
+      String[] lineSplits = line.split(" ");
+
+      LOGGER.info("*********************************************************************");
+      LOGGER.info("mapper input : {}", value);
+      LOGGER.info("Path to output : {}", outputPath);
+      LOGGER.info("Table name : {}", tableName);
+      LOGGER.info("num lines : {}", lineSplits.length);
+
+      for (String split : lineSplits) {
+        LOGGER.info("Command line : {}", split);
+      }
+      LOGGER.info("*********************************************************************");
+
+      if (lineSplits.length != 3) {
+        throw new RuntimeException("Input to the mapper is malformed, please contact the pinot team");
+      }
+      inputFilePath = lineSplits[1].trim();
+
+      LOGGER.info("*********************************************************************");
+      LOGGER.info("input data file path : {}", inputFilePath);
+      LOGGER.info("local hdfs segment tar path: {}", localHdfsSegmentTarPath);
+      LOGGER.info("local disk segment path: {}", localDiskSegmentDirectory);
+      LOGGER.info("*********************************************************************");
+
+      try {
+        createSegment(inputFilePath, schema, lineSplits[2]);
+        LOGGER.info("finished segment creation job successfully");
+      } catch (Exception e) {
+        LOGGER.error("Got exceptions during creating segments!", e);
+      }
+
+      context.write(new LongWritable(Long.parseLong(lineSplits[2])),
+          new Text(FileSystem.get(new Configuration()).listStatus(new Path(localHdfsSegmentTarPath + "/"))[0].getPath().getName()));
+      LOGGER.info("finished the job successfully");
+    }
+
+    private String createSegment(String dataFilePath, Schema schema, String seqId) throws Exception {
+      final FileSystem fs = FileSystem.get(new Configuration());
+      final Path hdfsDataPath = new Path(dataFilePath);
+      final File dataPath = new File(currentDiskWorkDir, "data");
+      if (dataPath.exists()) {
+        dataPath.delete();
+      }
+      dataPath.mkdir();
+      final Path localFilePath = new Path(dataPath + "/" + hdfsDataPath.getName());
+      fs.copyToLocalFile(hdfsDataPath, localFilePath);
+
+      LOGGER.info("Data schema is : {}", schema);
+
+      // Set segment generator config
+      LOGGER.info("*********************************************************************");
+      SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(schema);
+      segmentGeneratorConfig.setTableName(tableName);
+      segmentGeneratorConfig.setInputFilePath(new File(dataPath, hdfsDataPath.getName()).getAbsolutePath());
+      LOGGER.info("Setting input path {}", segmentGeneratorConfig.getInputFilePath());
+      segmentGeneratorConfig.setFormat(FileFormat.AVRO);
+      segmentGeneratorConfig.setSegmentNamePostfix(seqId);
+      segmentGeneratorConfig.setOutDir(localDiskSegmentDirectory);
+      LOGGER.info("Setting enableStarTreeIndex");
+      String minTime = ThirdEyeConstants.DATE_TIME_FORMATTER.print(segmentWallClockStartTime);
+      String maxTime = ThirdEyeConstants.DATE_TIME_FORMATTER.print(segmentWallClockEndTime);
+      LOGGER.info("Wall clock time : min {} max {}", minTime, maxTime);
+      LOGGER.info("isBackfill : {}", isBackfill);
+      if (isBackfill) {
+        // if case of backfill, we have to ensure that segment name is same as original segment name
+        // we are retaining the segment name through the  backfill and derived_column_transformation phases
+        // in the output files generated
+        // backfill will generated original_segment_name.avro
+        // derived_column_transformation will generate original_segment_name-m-00000.avro etc
+        String segmentName = hdfsDataPath.getName().split("-(m|r)-[0-9]{5}")[0];
+        segmentName = segmentName.split(ThirdEyeConstants.AVRO_SUFFIX)[0];
+        segmentGeneratorConfig.setSegmentName(segmentName);
+      } else {
+        String segmentName =
+            Joiner.on(ThirdEyeConstants.SEGMENT_JOINER).join(tableName, segmentSchedule, minTime, maxTime, seqId);
+        segmentGeneratorConfig.setSegmentName(segmentName);
+      }
+      LOGGER.info("Setting segment name {}", segmentGeneratorConfig.getSegmentName());
+
+
+      // Set star tree config
+      StarTreeIndexSpec starTreeIndexSpec = new StarTreeIndexSpec();
+
+      // _raw dimensions should not be in star tree split order
+      // if a dimension has a _topk column, we will include only
+      // the column with topk, and skip _raw column for materialization in star tree
+      Set<String> skipMaterializationForDimensions = new HashSet<>();
+      Set<String> transformDimensionsSet = thirdeyeConfig.getTransformDimensions();
+      LOGGER.info("Dimensions with _topk column {}", transformDimensionsSet);
+      for (String topkTransformDimension : transformDimensionsSet) {
+        skipMaterializationForDimensions.add(topkTransformDimension);
+        LOGGER.info("Adding {} to skipMaterialization set", topkTransformDimension);
+      }
+      starTreeIndexSpec.setSkipMaterializationForDimensions(skipMaterializationForDimensions);
+      LOGGER.info("Setting skipMaterializationForDimensions {}", skipMaterializationForDimensions);
+
+      if (thirdeyeConfig.getSplit() != null) {
+        starTreeIndexSpec.setMaxLeafRecords(thirdeyeConfig.getSplit().getThreshold());
+        LOGGER.info("Setting split threshold to {}", starTreeIndexSpec.getMaxLeafRecords());
+        List<String> splitOrder = thirdeyeConfig.getSplit().getOrder();
+        if (splitOrder != null) {
+          LOGGER.info("Removing from splitOrder, any dimensions which are also in skipMaterializationForDimensions");
+          splitOrder.removeAll(skipMaterializationForDimensions);
+          starTreeIndexSpec.setDimensionsSplitOrder(splitOrder);
+        }
+        LOGGER.info("Setting splitOrder {}", splitOrder);
+      }
+      segmentGeneratorConfig.enableStarTreeIndex(starTreeIndexSpec);
+      LOGGER.info("*********************************************************************");
+
+      // Set time for SIMPLE_DATE_FORMAT case
+      String sdfPrefix = TimeFormat.SIMPLE_DATE_FORMAT.toString() + ThirdEyeConstants.SDF_SEPARATOR;
+      if (thirdeyeConfig.getTime().getTimeFormat().startsWith(sdfPrefix)) {
+
+        String pattern = thirdeyeConfig.getTime().getTimeFormat().split(ThirdEyeConstants.SDF_SEPARATOR)[1];
+        DateTimeFormatter sdfFormatter = DateTimeFormat.forPattern(pattern);
+
+        File localAvroFile = new File(dataPath, hdfsDataPath.getName());
+        LongColumnPreIndexStatsCollector timeColumnStatisticsCollector =
+            getTimeColumnStatsCollector(schema, localAvroFile);
+        String startTime = timeColumnStatisticsCollector.getMinValue().toString();
+        String endTime = timeColumnStatisticsCollector.getMaxValue().toString();
+        startTime = String.valueOf(DateTime.parse(startTime, sdfFormatter).getMillis());
+        endTime = String.valueOf(DateTime.parse(endTime, sdfFormatter).getMillis());
+
+        // set start time
+        segmentGeneratorConfig.getCustomProperties().put(SEGMENT_START_TIME, startTime);
+        // set end time
+        segmentGeneratorConfig.getCustomProperties().put(SEGMENT_END_TIME, endTime);
+        // set time unit
+        segmentGeneratorConfig.setSegmentTimeUnit(TimeUnit.MILLISECONDS);
+      }
+
+      // Generate segment
+      SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
+      driver.init(segmentGeneratorConfig);
+      driver.build();
+
+      // Tar the segment directory into file.
+      String segmentName = null;
+      File localDiskSegmentDirectoryFile = new File(localDiskSegmentDirectory);
+      for (File file : localDiskSegmentDirectoryFile.listFiles()) {
+        segmentName = file.getName();
+        if (segmentName.startsWith(tableName)) {
+          break;
+        }
+      }
+      String localSegmentPath = new File(localDiskSegmentDirectory, segmentName).getAbsolutePath();
+
+      String localTarPath = localDiskSegmentTarPath + "/" + segmentName + ".tar.gz";
+      LOGGER.info("Trying to tar the segment to: {}", localTarPath);
+      TarGzCompressionUtils.createTarGzOfDirectory(localSegmentPath, localTarPath);
+      String hdfsTarPath = localHdfsSegmentTarPath + "/" + segmentName + ".tar.gz";
+
+      LOGGER.info("*********************************************************************");
+      LOGGER.info("Copy from : {} to {}", localTarPath, hdfsTarPath);
+      LOGGER.info("*********************************************************************");
+      fs.copyFromLocalFile(true, true, new Path(localTarPath), new Path(hdfsTarPath));
+      return segmentName;
+    }
+
+    private LongColumnPreIndexStatsCollector getTimeColumnStatsCollector(Schema schema, File localAvroFile)
+        throws FileNotFoundException, IOException {
+      String timeColumnName = schema.getTimeColumnName();
+      FieldSpec spec =  schema.getTimeFieldSpec();
+      LOGGER.info("Spec for " + timeColumnName + " is " + spec);
+      LongColumnPreIndexStatsCollector timeColumnStatisticsCollector = new LongColumnPreIndexStatsCollector(spec.getName(), new StatsCollectorConfig(schema, null));
+      LOGGER.info("StatsCollector :" + timeColumnStatisticsCollector);
+      DataFileStream<GenericRecord> dataStream =
+          new DataFileStream<GenericRecord>(new FileInputStream(localAvroFile), new GenericDatumReader<GenericRecord>());
+      while (dataStream.hasNext()) {
+        GenericRecord next = dataStream.next();
+        timeColumnStatisticsCollector.collect(next.get(timeColumnName));
+      }
+      dataStream.close();
+      timeColumnStatisticsCollector.seal();
+
+      return timeColumnStatisticsCollector;
+    }
+
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/DimensionValueMetricPair.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/DimensionValueMetricPair.java
new file mode 100644
index 0000000000..e3dc9347ba
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/DimensionValueMetricPair.java
@@ -0,0 +1,60 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.topk;
+
+/**
+ * Class to manage dimension value and metric values pairs
+ * The order of elements is determined based on the metric value -
+ * Lesser metric value is treated as greater in ordering,
+ * so that it gets removed from the fixed size PriorityQueue first
+ */
+public class DimensionValueMetricPair implements Comparable<DimensionValueMetricPair>{
+
+  private Object dimensionValue;
+  private Number metricValue;
+
+  public DimensionValueMetricPair(Object dimensionValue, Number metricValue) {
+    this.dimensionValue = dimensionValue;
+    this.metricValue = metricValue;
+  }
+
+  public Object getDimensionValue() {
+    return dimensionValue;
+  }
+  public void setDimensionValue(Object dimensionValue) {
+    this.dimensionValue = dimensionValue;
+  }
+  public Number getMetricValue() {
+    return metricValue;
+  }
+  public void setMetricValue(Number metricValue) {
+    this.metricValue = metricValue;
+  }
+
+
+  @Override
+  public int compareTo(DimensionValueMetricPair other) {
+    return other.metricValue.intValue() - this.metricValue.intValue();
+  }
+
+  @Override
+  public String toString() {
+    return "[" + dimensionValue + "=" + metricValue + "]";
+  }
+
+
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKDimensionValues.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKDimensionValues.java
new file mode 100644
index 0000000000..1f11f0370b
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKDimensionValues.java
@@ -0,0 +1,75 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.topk;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+/**
+ * Class to create and manage top k values for every dimension
+ */
+public class TopKDimensionValues {
+  private Map<String, Set<String>> topKDimensions;
+
+  public TopKDimensionValues() {
+    topKDimensions = new HashMap<>();
+  }
+
+  public Map<String, Set<String>> getTopKDimensions() {
+    return topKDimensions;
+  }
+
+  public void setTopKDimensions(Map<String, Set<String>> topKDimensions) {
+    this.topKDimensions = topKDimensions;
+  }
+
+  /**
+   * Add a top k value for a dimension
+   * @param dimension
+   * @param value
+   */
+  public void addValue(String dimension, String value) {
+    if (topKDimensions.get(dimension) == null) {
+      topKDimensions.put(dimension, new HashSet<String>());
+    }
+    topKDimensions.get(dimension).add(value);
+  }
+
+  public void addAllValues(String dimension, Set<String> values) {
+    if (topKDimensions.get(dimension) == null) {
+      topKDimensions.put(dimension, new HashSet<String>());
+    }
+    topKDimensions.get(dimension).addAll(values);
+  }
+
+  /**
+   * Add all top k values for all dimensions from a TopKDimensionValues object
+   * @param valuesFile
+   */
+  public void addMap(TopKDimensionValues valuesFile) {
+    Map<String, Set<String>> values = valuesFile.getTopKDimensions();
+    for (Entry<String, Set<String>> entry : values.entrySet()) {
+      if (topKDimensions.get(entry.getKey()) == null) {
+        topKDimensions.put(entry.getKey(), new HashSet<String>());
+      }
+      topKDimensions.get(entry.getKey()).addAll(entry.getValue());
+    }
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseConfig.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseConfig.java
new file mode 100644
index 0000000000..3fbe641f58
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseConfig.java
@@ -0,0 +1,159 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.topk;
+
+import org.apache.pinot.thirdeye.hadoop.config.DimensionSpec;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+import org.apache.pinot.thirdeye.hadoop.config.MetricSpec;
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
+import org.apache.pinot.thirdeye.hadoop.config.TopkWhitelistSpec;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This class contains the config needed by TopKPhase
+ * and the methods to obtain the config from the ThirdEyeConfig
+ */
+public class TopKPhaseConfig {
+  private List<String> dimensionNames;
+  private List<DimensionType> dimensionTypes;
+  private List<String> metricNames;
+  private List<MetricType> metricTypes;
+  private Map<String, Double> metricThresholds;
+  private Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec;
+  private Map<String, List<String>> whitelist;
+
+  private static final double DEFAULT_METRIC_THRESHOLD = 0.01;
+
+  public TopKPhaseConfig() {
+
+  }
+
+  /**
+   * @param dimensionNames
+   * @param dimensionTypes
+   * @param metricNames
+   * @param metricTypes
+   * @param metricThresholds
+   * @param whitelist
+   */
+  public TopKPhaseConfig(List<String> dimensionNames, List<DimensionType> dimensionTypes,
+      List<String> metricNames, List<MetricType> metricTypes,
+      Map<String, Double> metricThresholds, Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec,
+      Map<String, List<String>> whitelist) {
+    super();
+    this.dimensionNames = dimensionNames;
+    this.dimensionTypes = dimensionTypes;
+    this.metricNames = metricNames;
+    this.metricTypes = metricTypes;
+    this.metricThresholds = metricThresholds;
+    this.topKDimensionToMetricsSpec = topKDimensionToMetricsSpec;
+    this.whitelist = whitelist;
+  }
+
+  public List<String> getDimensionNames() {
+    return dimensionNames;
+  }
+
+  public List<DimensionType> getDimensionTypes() {
+    return dimensionTypes;
+  }
+
+  public List<String> getMetricNames() {
+    return metricNames;
+  }
+
+  public List<MetricType> getMetricTypes() {
+    return metricTypes;
+  }
+
+  public Map<String, Double> getMetricThresholds() {
+    return metricThresholds;
+  }
+
+  public Map<String, TopKDimensionToMetricsSpec> getTopKDimensionToMetricsSpec() {
+    return topKDimensionToMetricsSpec;
+  }
+
+  public Map<String, List<String>> getWhitelist() {
+    return whitelist;
+  }
+
+  /**
+   * This method generates necessary top k config for TopKPhase job from
+   * ThirdEye config
+   * @param config
+   * @return
+   */
+  public static TopKPhaseConfig fromThirdEyeConfig(ThirdEyeConfig config) {
+
+    //metrics
+    List<String> metricNames = new ArrayList<>(config.getMetrics().size());
+    List<MetricType> metricTypes = new ArrayList<>(config.getMetrics().size());
+    for (MetricSpec spec : config.getMetrics()) {
+      metricNames.add(spec.getName());
+      metricTypes.add(spec.getType());
+    }
+
+    // dimensions
+    List<String> dimensionNames = new ArrayList<>(config.getDimensions().size());
+    List<DimensionType> dimensionTypes = new ArrayList<>(config.getDimensions().size());
+    for (DimensionSpec spec : config.getDimensions()) {
+      dimensionNames.add(spec.getName());
+      dimensionTypes.add(spec.getDimensionType());
+    }
+
+    TopkWhitelistSpec topKWhitelist = config.getTopKWhitelist();
+    Map<String, Double> metricThresholds = new HashMap<>();
+    Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpec = new HashMap<>();
+    Map<String, List<String>> whitelist = new HashMap<>();
+
+    // topk
+    if (topKWhitelist != null) {
+      // metric thresholds
+      if (topKWhitelist.getThreshold() != null) {
+        metricThresholds = topKWhitelist.getThreshold();
+      }
+      for (String metric : metricNames) {
+        if (metricThresholds.get(metric) == null) {
+          metricThresholds.put(metric, DEFAULT_METRIC_THRESHOLD);
+        }
+      }
+
+      // topk
+      if (topKWhitelist.getTopKDimensionToMetricsSpec() != null) {
+        for (TopKDimensionToMetricsSpec topkSpec : topKWhitelist.getTopKDimensionToMetricsSpec()) {
+          topKDimensionToMetricsSpec.put(topkSpec.getDimensionName(), topkSpec);
+        }
+      }
+
+      // whitelist
+      if (topKWhitelist.getWhitelist() != null) {
+        whitelist.putAll(topKWhitelist.getWhitelist());
+      }
+    }
+
+    return new TopKPhaseConfig(dimensionNames, dimensionTypes, metricNames, metricTypes, metricThresholds,
+        topKDimensionToMetricsSpec, whitelist);
+  }
+
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseConstants.java
new file mode 100644
index 0000000000..dd815ef18d
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseConstants.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.topk;
+
+/**
+ * This class contains the properties to be set for topk phase
+ */
+public enum TopKPhaseConstants {
+  TOPK_PHASE_INPUT_PATH("topk.phase.input.path"),
+  TOPK_PHASE_OUTPUT_PATH("topk.phase.output.path"),
+  TOPK_PHASE_THIRDEYE_CONFIG("topk.rollup.phase.thirdeye.config");
+
+  String name;
+
+  TopKPhaseConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseJob.java
new file mode 100644
index 0000000000..8358aa0fc2
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseJob.java
@@ -0,0 +1,463 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.topk;
+
+import static org.apache.pinot.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_INPUT_PATH;
+import static org.apache.pinot.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_OUTPUT_PATH;
+import static org.apache.pinot.thirdeye.hadoop.topk.TopKPhaseConstants.TOPK_PHASE_THIRDEYE_CONFIG;
+
+import java.io.DataOutput;
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.commons.collections.MapUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.MinMaxPriorityQueue;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfigProperties;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+import org.apache.pinot.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
+import org.apache.pinot.thirdeye.hadoop.util.ThirdeyeAggregateMetricUtils;
+import org.apache.pinot.thirdeye.hadoop.util.ThirdeyeAvroUtils;
+
+/**
+ * This phase reads avro input, and produces a file with top k values for dimensions
+ *
+ * Map:
+ * Map phase reads avro records, and for each record emits
+ * Key=(Dimension name, Dimension Value) Value=(Metrics)
+ * For each record, map also emits a
+ * Key=(ALL, ALL) Value=(Metrics)
+ * This is used for computing the metric sums in the reduce phase
+ *
+ * Combine:
+ * Combine phase receives Key=(DimensionName, DimensionValue)
+ * from each map, and aggregates the metric values. This phase
+ * helps in reducing the traffic sent to reducer
+ *
+ * Reduce:
+ * We strictly use just 1 reducer.
+ * Reduce phase receives Key=(DimensionName, DimensionValue)
+ * and aggregates the metric values
+ * The very first key received is (ALL, ALL) with helps us compute total metric sum
+ * These metric sums are used to check metric thresholds of other
+ * (dimensionName, dimensionValue) pairs. If none of the metric
+ * thresholds pass, the pair is discarded.
+ * In the cleanup, top k dimension values are picked for each dimension
+ * based on the metric value
+ * The top k dimension values for each dimension are written to a file
+ *
+ */
+public class TopKPhaseJob extends Configured {
+  private static final Logger LOGGER = LoggerFactory.getLogger(TopKPhaseJob.class);
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+  private static final String TOPK_ALL_DIMENSION_NAME = "0";
+  private static final String TOPK_ALL_DIMENSION_VALUE = "0";
+
+  private String name;
+  private Properties props;
+
+  /**
+   * @param name
+   * @param props
+   */
+  public TopKPhaseJob(String name, Properties props) {
+    super(new Configuration());
+    this.name = name;
+    this.props = props;
+  }
+
+  public static class TopKPhaseMapper
+      extends Mapper<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> {
+
+    private TopKPhaseConfig config;
+    ThirdEyeConfig thirdeyeConfig;
+    private List<String> dimensionNames;
+    private List<DimensionType> dimensionTypes;
+    private List<String> metricNames;
+    private List<MetricType> metricTypes;
+    private int numMetrics;
+    BytesWritable keyWritable;
+    BytesWritable valWritable;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+      LOGGER.info("TopKPhaseJob.TopKPhaseMapper.setup()");
+      Configuration configuration = context.getConfiguration();
+      try {
+        thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
+        config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
+        dimensionNames = config.getDimensionNames();
+        dimensionTypes = config.getDimensionTypes();
+        metricNames = config.getMetricNames();
+        metricTypes = config.getMetricTypes();
+        numMetrics = metricNames.size();
+        valWritable = new BytesWritable();
+        keyWritable = new BytesWritable();
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+    }
+
+
+    @Override
+    public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
+        throws IOException, InterruptedException {
+
+      // input record
+      GenericRecord inputRecord = key.datum();
+
+      // read metrics
+      Number[] metricValues = new Number[numMetrics];
+      for (int i = 0; i < numMetrics; i++) {
+        String metricName = metricNames.get(i);
+        Number metricValue = ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName);
+        metricValues[i] = metricValue;
+      }
+      TopKPhaseMapOutputValue valWrapper = new TopKPhaseMapOutputValue(metricValues, metricTypes);
+      byte[] valBytes = valWrapper.toBytes();
+      valWritable.set(valBytes, 0, valBytes.length);
+
+      // read dimensions
+      for (int i = 0; i < dimensionNames.size(); i++) {
+        String dimensionName = dimensionNames.get(i);
+        DimensionType dimensionType = dimensionTypes.get(i);
+        Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
+
+        TopKPhaseMapOutputKey keyWrapper = new TopKPhaseMapOutputKey(dimensionName, dimensionValue, dimensionType);
+        byte[] keyBytes = keyWrapper.toBytes();
+        keyWritable.set(keyBytes, 0, keyBytes.length);
+        context.write(keyWritable, valWritable);
+      }
+      TopKPhaseMapOutputKey allKeyWrapper = new TopKPhaseMapOutputKey(TOPK_ALL_DIMENSION_NAME, TOPK_ALL_DIMENSION_VALUE, DimensionType.STRING);
+      byte[] allKeyBytes = allKeyWrapper.toBytes();
+      keyWritable.set(allKeyBytes, 0, allKeyBytes.length);
+      context.write(keyWritable, valWritable);
+    }
+
+    @Override
+    public void cleanup(Context context) throws IOException, InterruptedException {
+
+    }
+  }
+
+  public static class TopKPhaseCombiner
+    extends Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable> {
+
+    private TopKPhaseConfig config;
+    ThirdEyeConfig thirdeyeConfig;
+    private List<MetricType> metricTypes;
+    private int numMetrics;
+    BytesWritable keyWritable;
+    BytesWritable valWritable;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+      LOGGER.info("TopKPhaseJob.TopKPhaseCombiner.setup()");
+      Configuration configuration = context.getConfiguration();
+      try {
+        thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
+        config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
+        metricTypes = config.getMetricTypes();
+        numMetrics = metricTypes.size();
+        valWritable = new BytesWritable();
+        keyWritable = new BytesWritable();
+
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+    }
+
+    @Override
+    public void reduce(BytesWritable key, Iterable<BytesWritable> values, Context context)
+    throws IOException, InterruptedException {
+
+      Number[] aggMetricValues = new Number[numMetrics];
+      Arrays.fill(aggMetricValues, 0);
+
+      for (BytesWritable value : values) {
+        TopKPhaseMapOutputValue valWrapper = TopKPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes);
+        Number[] metricValues = valWrapper.getMetricValues();
+        ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
+      }
+
+      TopKPhaseMapOutputValue valWrapper = new TopKPhaseMapOutputValue(aggMetricValues, metricTypes);
+      byte[] valBytes = valWrapper.toBytes();
+      valWritable.set(valBytes, 0, valBytes.length);
+
+      context.write(key, valWritable);
+    }
+  }
+
+  public static class TopKPhaseReducer
+      extends Reducer<BytesWritable, BytesWritable, NullWritable, NullWritable> {
+
+    private FileSystem fileSystem;
+    private Configuration configuration;
+
+    private ThirdEyeConfig thirdeyeConfig;
+    private TopKPhaseConfig config;
+    private List<String> dimensionNames;
+    private List<String> metricNames;
+    private List<MetricType> metricTypes;
+    private Map<String, Integer> metricToIndexMapping;
+    private int numMetrics;
+    BytesWritable keyWritable;
+    BytesWritable valWritable;
+    Number[] metricSums;
+    private Map<String, Map<Object, Number[]>> dimensionNameToValuesMap;
+    private TopKDimensionValues topkDimensionValues;
+    private Map<String, Double> metricThresholds;
+    private Map<String, Integer> thresholdPassCount;
+    private Map<String, TopKDimensionToMetricsSpec> topKDimensionToMetricsSpecMap;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+
+      LOGGER.info("TopKPhaseJob.TopKPhaseReducer.setup()");
+
+      configuration = context.getConfiguration();
+      fileSystem = FileSystem.get(configuration);
+      try {
+        thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(TOPK_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
+        config = TopKPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
+        LOGGER.info("Metric Thresholds form config {}", config.getMetricThresholds());
+        metricThresholds = config.getMetricThresholds();
+        topKDimensionToMetricsSpecMap = config.getTopKDimensionToMetricsSpec();
+        dimensionNames = config.getDimensionNames();
+        metricNames = config.getMetricNames();
+        metricTypes = config.getMetricTypes();
+
+        numMetrics = metricNames.size();
+
+        metricToIndexMapping = new HashMap<>();
+        for (int i = 0; i < numMetrics; i ++) {
+          metricToIndexMapping.put(metricNames.get(i), i);
+        }
+
+        dimensionNameToValuesMap = new HashMap<>();
+        thresholdPassCount = new HashMap<>();
+        for (String dimension : dimensionNames) {
+          dimensionNameToValuesMap.put(dimension, new HashMap<Object, Number[]>());
+          thresholdPassCount.put(dimension, 0);
+        }
+        topkDimensionValues = new TopKDimensionValues();
+
+        keyWritable = new BytesWritable();
+        valWritable = new BytesWritable();
+
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+    }
+
+    @Override
+    public void reduce(BytesWritable key, Iterable<BytesWritable> values,
+        Context context) throws IOException, InterruptedException {
+
+      TopKPhaseMapOutputKey keyWrapper = TopKPhaseMapOutputKey.fromBytes(key.getBytes());
+      String dimensionName = keyWrapper.getDimensionName();
+      Object dimensionValue = keyWrapper.getDimensionValue();
+
+      // Get aggregate metric values for dimension name value pair
+      Number[] aggMetricValues = new Number[numMetrics];
+      Arrays.fill(aggMetricValues, 0);
+      for (BytesWritable value : values) {
+        TopKPhaseMapOutputValue valWrapper = TopKPhaseMapOutputValue.fromBytes(value.getBytes(), metricTypes);
+        Number[] metricValues = valWrapper.getMetricValues();
+        ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
+      }
+
+      // Metric sums case
+      if (dimensionName.equals(TOPK_ALL_DIMENSION_NAME) && dimensionValue.equals(TOPK_ALL_DIMENSION_VALUE)) {
+        LOGGER.info("Setting metric sums");
+        metricSums = new Number[numMetrics];
+        metricSums = Arrays.copyOf(aggMetricValues, numMetrics);
+        return;
+      }
+
+      // Check metric percentage threshold
+      if (MapUtils.isNotEmpty(metricThresholds)) {
+        boolean isPassThreshold = false;
+        for (int i = 0; i < numMetrics; i++) {
+          String metric = metricNames.get(i);
+          double metricValue = aggMetricValues[i].doubleValue();
+          double metricSum = metricSums[i].doubleValue();
+          double metricThresholdPercentage = metricThresholds.get(metric);
+          if (metricValue >= (metricSum * metricThresholdPercentage / 100)) {
+            isPassThreshold = true;
+            thresholdPassCount.put(dimensionName, thresholdPassCount.get(dimensionName) + 1);
+            break;
+          }
+        }
+        if (!isPassThreshold) {
+          return;
+        }
+        dimensionNameToValuesMap.get(dimensionName).put(dimensionValue, aggMetricValues);
+      }
+    }
+
+    @Override
+    protected void cleanup(Context context) throws IOException, InterruptedException {
+
+      for (String dimension : dimensionNames) {
+
+        LOGGER.info("{} records passed metric threshold for dimension {}", thresholdPassCount.get(dimension), dimension);
+
+        // Get top k
+        TopKDimensionToMetricsSpec topkSpec = topKDimensionToMetricsSpecMap.get(dimension);
+        if (topkSpec != null && topkSpec.getDimensionName() != null && topkSpec.getTopk() != null) {
+
+          // Get top k for each metric specified
+          Map<String, Integer> topkMetricsMap = topkSpec.getTopk();
+          for (Entry<String, Integer> topKEntry : topkMetricsMap.entrySet()) {
+
+            String metric = topKEntry.getKey();
+            int k = topKEntry.getValue();
+            MinMaxPriorityQueue<DimensionValueMetricPair> topKQueue = MinMaxPriorityQueue.maximumSize(k).create();
+
+            Map<Object, Number[]> dimensionToMetricsMap = dimensionNameToValuesMap.get(dimension);
+            for (Entry<Object, Number[]> entry : dimensionToMetricsMap.entrySet()) {
+              topKQueue.add(new DimensionValueMetricPair(entry.getKey(), entry.getValue()[metricToIndexMapping.get(metric)]));
+            }
+            LOGGER.info("Picking Top {} values for {} based on Metric {} : {}", k, dimension, metric, topKQueue);
+            for (DimensionValueMetricPair pair : topKQueue) {
+              topkDimensionValues.addValue(dimension, String.valueOf(pair.getDimensionValue()));
+            }
+          }
+        }
+      }
+
+      if (topkDimensionValues.getTopKDimensions().size() > 0) {
+        String topkValuesPath = configuration.get(TOPK_PHASE_OUTPUT_PATH.toString());
+        LOGGER.info("Writing top k values to {}",topkValuesPath);
+        FSDataOutputStream topKDimensionValuesOutputStream = fileSystem.create(
+            new Path(topkValuesPath + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE));
+        OBJECT_MAPPER.writeValue((DataOutput) topKDimensionValuesOutputStream, topkDimensionValues);
+        topKDimensionValuesOutputStream.close();
+      }
+    }
+  }
+
+  public Job run() throws Exception {
+    Job job = Job.getInstance(getConf());
+    job.setJobName(name);
+    job.setJarByClass(TopKPhaseJob.class);
+
+    Configuration configuration = job.getConfiguration();
+    FileSystem fs = FileSystem.get(configuration);
+
+    // Properties
+    LOGGER.info("Properties {}", props);
+
+     // Input Path
+    String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH);
+    LOGGER.info("Input path dir: " + inputPathDir);
+    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
+      LOGGER.info("Adding input:" + inputPath);
+      Path input = new Path(inputPath);
+      FileInputFormat.addInputPath(job, input);
+    }
+
+    // Output path
+    Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH));
+    LOGGER.info("Output path dir: " + outputPath.toString());
+    if (fs.exists(outputPath)) {
+      fs.delete(outputPath, true);
+    }
+    FileOutputFormat.setOutputPath(job, outputPath);
+
+    // Schema
+    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
+    LOGGER.info("Schema : {}", avroSchema.toString(true));
+
+    // ThirdEyeConfig
+    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty(
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString()), avroSchema);
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), dimensionTypesProperty);
+    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
+        props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
+    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
+    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
+    job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
+
+    // Map config
+    job.setMapperClass(TopKPhaseMapper.class);
+    job.setInputFormatClass(AvroKeyInputFormat.class);
+    job.setMapOutputKeyClass(BytesWritable.class);
+    job.setMapOutputValueClass(BytesWritable.class);
+
+    // Combiner
+    job.setCombinerClass(TopKPhaseCombiner.class);
+
+     // Reduce config
+    job.setReducerClass(TopKPhaseReducer.class);
+    job.setOutputKeyClass(NullWritable.class);
+    job.setOutputValueClass(NullWritable.class);
+    job.setNumReduceTasks(1);
+
+    job.waitForCompletion(true);
+
+    return job;
+  }
+
+
+  private String getAndSetConfiguration(Configuration configuration,
+      TopKPhaseConstants constant) {
+    String value = getAndCheck(constant.toString());
+    configuration.set(constant.toString(), value);
+    return value;
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java
new file mode 100644
index 0000000000..024549c7ab
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseMapOutputKey.java
@@ -0,0 +1,112 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.topk;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+
+/**
+ * Wrapper for the key generated by mapper in TopKPhase
+ */
+public class TopKPhaseMapOutputKey {
+
+  String dimensionName;
+  Object dimensionValue;
+  DimensionType dimensionType;
+
+  public TopKPhaseMapOutputKey(String dimensionName, Object dimensionValue, DimensionType dimensionType) {
+    this.dimensionName = dimensionName;
+    this.dimensionValue = dimensionValue;
+    this.dimensionType = dimensionType;
+  }
+
+  public String getDimensionName() {
+    return dimensionName;
+  }
+
+  public Object getDimensionValue() {
+    return dimensionValue;
+  }
+
+  public DimensionType getDimensionType() {
+    return dimensionType;
+  }
+
+  /**
+   * Converts a TopKPhaseMapOutputKey to a bytes buffer
+   * @return
+   * @throws IOException
+   */
+  public byte[] toBytes() throws IOException {
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    byte[] bytes;
+    // dimension name
+    bytes = dimensionName.getBytes();
+    dos.writeInt(bytes.length);
+    dos.write(bytes);
+
+    // dimension type
+    bytes = dimensionType.toString().getBytes();
+    dos.writeInt(bytes.length);
+    dos.write(bytes);
+
+    // dimension value
+    DimensionType.writeDimensionValueToOutputStream(dos, dimensionValue, dimensionType);
+    baos.close();
+    dos.close();
+    return baos.toByteArray();
+  }
+
+  /**
+   * Constructs a TopKPhaseMapOutputKey from a bytes buffer
+   * @param buffer
+   * @return
+   * @throws IOException
+   */
+  public static TopKPhaseMapOutputKey fromBytes(byte[] buffer) throws IOException {
+    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
+    int length;
+    byte[] bytes;
+
+    // dimension name
+    length = dis.readInt();
+    bytes = new byte[length];
+    dis.read(bytes);
+    String dimensionName = new String(bytes);
+
+    // dimension type
+    length = dis.readInt();
+    bytes = new byte[length];
+    dis.read(bytes);
+    String dimensionTypeString = new String(bytes);
+    DimensionType dimensionType = DimensionType.valueOf(dimensionTypeString);
+
+    // dimension value
+    Object dimensionValue = DimensionType.readDimensionValueFromDataInputStream(dis, dimensionType);
+
+    TopKPhaseMapOutputKey wrapper;
+    wrapper = new TopKPhaseMapOutputKey(dimensionName, dimensionValue, dimensionType);
+    return wrapper;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java
new file mode 100644
index 0000000000..7c70cfca76
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/topk/TopKPhaseMapOutputValue.java
@@ -0,0 +1,93 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.topk;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+
+/**
+ * Wrapper for value generated by mapper in TopKPhase
+ */
+public class TopKPhaseMapOutputValue {
+
+  Number[] metricValues;
+  List<MetricType> metricTypes;
+
+  public TopKPhaseMapOutputValue(Number[] metricValues, List<MetricType> metricTypes) {
+    this.metricValues = metricValues;
+    this.metricTypes = metricTypes;
+  }
+
+  public Number[] getMetricValues() {
+    return metricValues;
+  }
+
+  /**
+   * Converts TopkPhaseMapOutputValue to a buffer of bytes
+   * @return
+   * @throws IOException
+   */
+  public byte[] toBytes() throws IOException {
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+
+    // metric values
+    dos.writeInt(metricValues.length);
+    for (int i = 0; i < metricValues.length; i++) {
+      Number number = metricValues[i];
+      MetricType metricType = metricTypes.get(i);
+      MetricType.writeMetricValueToDataOutputStream(dos, number, metricType);
+    }
+
+    baos.close();
+    dos.close();
+    return baos.toByteArray();
+  }
+
+  /**
+   * Constructs TopKPhaseMapOutputValue from bytes buffer
+   * @param buffer
+   * @param metricTypes
+   * @return
+   * @throws IOException
+   */
+  public static TopKPhaseMapOutputValue fromBytes(byte[] buffer, List<MetricType> metricTypes) throws IOException {
+    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buffer));
+    int length;
+
+    // metric values
+    length = dis.readInt();
+    Number[] metricValues = new Number[length];
+
+    for (int i = 0 ; i < length; i++) {
+      MetricType metricType = metricTypes.get(i);
+      Number metricValue = MetricType.readMetricValueFromDataInputStream(dis, metricType);
+      metricValues[i] = metricValue;
+    }
+
+    TopKPhaseMapOutputValue wrapper;
+    wrapper = new TopKPhaseMapOutputValue(metricValues, metricTypes);
+    return wrapper;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java
new file mode 100644
index 0000000000..eda9993684
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DefaultTransformConfigUDF.java
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.transform;
+
+import org.apache.hadoop.mapreduce.Job;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DefaultTransformConfigUDF implements TransformConfigUDF {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultTransformConfigUDF.class);
+
+  @Override
+  public void setTransformConfig(Job job) {
+
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DefaultTransformUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DefaultTransformUDF.java
new file mode 100644
index 0000000000..5fa23cc6a5
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DefaultTransformUDF.java
@@ -0,0 +1,44 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.transform;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DefaultTransformUDF implements TransformUDF {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultTransformUDF.class);
+
+  private Schema outputSchema;
+
+  public DefaultTransformUDF() {
+
+  }
+
+  @Override
+  public void init(Schema outputSchema) {
+    this.outputSchema = outputSchema;
+  }
+
+  @Override
+  public GenericRecord transformRecord(String sourceName, GenericRecord record) {
+    // Default implementation returns input record as is
+    return record;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java
new file mode 100644
index 0000000000..bc3dd15b66
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/DelegatingAvroKeyInputFormat.java
@@ -0,0 +1,77 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.transform;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.avro.mapreduce.AvroKeyRecordReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.codehaus.jackson.JsonParseException;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+
+import org.codehaus.jackson.type.TypeReference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DelegatingAvroKeyInputFormat<T> extends AvroKeyInputFormat<T> {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DelegatingAvroKeyInputFormat.class);
+  private static TypeReference MAP_STRING_STRING_TYPE = new TypeReference<Map<String, String>>() {
+  };
+
+  public org.apache.hadoop.mapreduce.RecordReader<org.apache.avro.mapred.AvroKey<T>, NullWritable> createRecordReader(
+      InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
+    LOGGER.info("DelegatingAvroKeyInputFormat.createRecordReader()  for split:{}", split);
+    FileSplit fileSplit = (FileSplit) split;
+    Configuration configuration = context.getConfiguration();
+    String sourceName = getSourceNameFromPath(fileSplit, configuration);
+    LOGGER.info("Source Name for path {} : {}", fileSplit.getPath(), sourceName);
+    Map<String, String> schemaJSONMapping = new ObjectMapper()
+        .readValue(configuration.get("schema.json.mapping"), MAP_STRING_STRING_TYPE);
+
+    LOGGER.info("Schema JSON Mapping: {}", schemaJSONMapping);
+
+    String sourceSchemaJSON = schemaJSONMapping.get(sourceName);
+
+    Schema schema = new Schema.Parser().parse(sourceSchemaJSON);
+    return new AvroKeyRecordReader<T>(schema);
+  }
+
+  public static String getSourceNameFromPath(FileSplit fileSplit, Configuration configuration)
+      throws IOException, JsonParseException, JsonMappingException {
+    String content = configuration.get("schema.path.mapping");
+    Map<String, String> schemaPathMapping =
+        new ObjectMapper().readValue(content, MAP_STRING_STRING_TYPE);
+    LOGGER.info("Schema Path Mapping: {}", schemaPathMapping);
+
+    String sourceName = null;
+    for (String path : schemaPathMapping.keySet()) {
+      if (fileSplit.getPath().toString().indexOf(path) > -1) {
+        sourceName = schemaPathMapping.get(path);
+        break;
+      }
+    }
+    return sourceName;
+  };
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformConfigUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformConfigUDF.java
new file mode 100644
index 0000000000..914c25d9a2
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformConfigUDF.java
@@ -0,0 +1,31 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.transform;
+
+import org.apache.hadoop.mapreduce.Job;
+
+/**
+ * Simple interface to transform a Generic Record
+ */
+public interface TransformConfigUDF {
+
+  /**
+   * @param record
+   * @return
+   */
+  void setTransformConfig(Job job);
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformPhaseJob.java
new file mode 100644
index 0000000000..1dd842b61d
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformPhaseJob.java
@@ -0,0 +1,289 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.transform;
+
+import static org.apache.pinot.thirdeye.hadoop.transform.TransformPhaseJobConstants.*;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.lang.reflect.Constructor;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.avro.mapreduce.AvroKeyOutputFormat;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * Transform job to transform input files from one schema to another
+ * Required properties:
+ * transform.input.schema=<path to input schema on hdfs>
+ * transform.output.schema=<path to output schema on hdfs>
+ * transform.input.path=<path to input data files on hdfs>
+ * transform.output.path=<output data path on hdfs>
+ * transform.udf.class=<UDF class to perform transformation>
+ */
+public class TransformPhaseJob extends Configured {
+  private static final Logger LOGGER = LoggerFactory.getLogger(TransformPhaseJob.class);
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+  private String name;
+  private Properties props;
+
+  public TransformPhaseJob(String name, Properties props) {
+    super(new Configuration());
+    this.name = name;
+    this.props = props;
+  }
+
+  public static class GenericTransformMapper
+      extends Mapper<AvroKey<GenericRecord>, NullWritable, IntWritable, AvroValue<GenericRecord>> {
+
+    TransformUDF transformUDF;
+    int numReducers;
+    int reducerKey;
+    String sourceName;
+
+    @Override
+    public void setup(Context context) throws IOException, InterruptedException {
+
+      LOGGER.info("GenericAvroTransformJob.GenericTransformMapper.setup()");
+
+      Configuration configuration = context.getConfiguration();
+      FileSystem fs = FileSystem.get(configuration);
+
+      FileSplit fileSplit = (FileSplit) context.getInputSplit();
+      LOGGER.info("split name:" + fileSplit.toString());
+      sourceName = DelegatingAvroKeyInputFormat.getSourceNameFromPath(fileSplit, configuration);
+      LOGGER.info("Input: {} belongs to Source:{}", fileSplit, sourceName);
+
+      String numTransformReducers = configuration.get(TRANSFORM_NUM_REDUCERS.toString());
+      numReducers = Integer.parseInt(numTransformReducers);
+      reducerKey = 1;
+      try {
+
+        String transformUDFClass = configuration.get(TRANSFORM_UDF.toString());
+        LOGGER.info("Initializing TransformUDFClass:{} with params:{}", transformUDFClass);
+        Constructor<?> constructor = Class.forName(transformUDFClass).getConstructor();
+        transformUDF = (TransformUDF) constructor.newInstance();
+
+        String outputSchemaPath = configuration.get(TRANSFORM_OUTPUT_SCHEMA.toString());
+        Schema.Parser parser = new Schema.Parser();
+        Schema outputSchema = parser.parse(fs.open(new Path(outputSchemaPath)));
+
+        transformUDF.init(outputSchema);
+      } catch (Exception e) {
+        throw new IOException(e);
+      }
+
+    }
+
+    @Override
+    public void map(AvroKey<GenericRecord> recordWrapper, NullWritable value, Context context)
+        throws IOException, InterruptedException {
+      GenericRecord record = recordWrapper.datum();
+      GenericRecord outputRecord = transformUDF.transformRecord(sourceName, record);
+
+      if (outputRecord != null) {
+
+        IntWritable key = new IntWritable(reducerKey);
+        reducerKey = (reducerKey == numReducers) ? (1) : (reducerKey + 1);
+        context.write(key, new AvroValue<GenericRecord>(outputRecord));
+      }
+
+    }
+
+    @Override
+    public void cleanup(Context context) throws IOException, InterruptedException {
+
+    }
+
+  }
+
+  public static class GenericTransformReducer
+      extends Reducer<IntWritable, AvroValue<GenericRecord>, AvroKey<GenericRecord>, NullWritable> {
+    @Override
+    public void reduce(IntWritable key, Iterable<AvroValue<GenericRecord>> values, Context context)
+        throws IOException, InterruptedException {
+      for (AvroValue<GenericRecord> value : values) {
+        GenericRecord record = value.datum();
+        context.write(new AvroKey<GenericRecord>(record), NullWritable.get());
+      }
+    }
+  }
+
+  public Job run() throws Exception {
+
+    // Set job config
+    Job job = Job.getInstance(getConf());
+    Configuration configuration = job.getConfiguration();
+    job.setJobName(name);
+    job.setJarByClass(TransformPhaseJob.class);
+
+    // Set custom config like adding distributed caches
+    String transformConfigUDFClass = getAndSetConfiguration(configuration, TRANSFORM_CONFIG_UDF);
+    LOGGER.info("Initializing TransformConfigUDFClass:{} with params:{}", transformConfigUDFClass);
+    Constructor<?> constructor = Class.forName(transformConfigUDFClass).getConstructor();
+    TransformConfigUDF transformConfigUDF = (TransformConfigUDF) constructor.newInstance();
+    transformConfigUDF.setTransformConfig(job);
+
+    FileSystem fs = FileSystem.get(configuration);
+
+    // Set outputSchema, output path
+    String outputSchemaPath = getAndSetConfiguration(configuration, TRANSFORM_OUTPUT_SCHEMA);
+    Schema.Parser parser = new Schema.Parser();
+    Schema outputSchema = parser.parse(fs.open(new Path(outputSchemaPath)));
+    LOGGER.info("{}", outputSchema);
+
+    String outputPathDir = getAndSetConfiguration(configuration, TRANSFORM_OUTPUT_PATH);
+    Path outputPath = new Path(outputPathDir);
+    if (fs.exists(outputPath)) {
+      fs.delete(outputPath, true);
+    }
+    FileOutputFormat.setOutputPath(job, new Path(outputPathDir));
+
+    // Set input schema, input path for every source
+    String sources = getAndSetConfiguration(configuration, TRANSFORM_SOURCE_NAMES);
+    List<String> sourceNames = Arrays.asList(sources.split(","));
+    Map<String, String> schemaMap = new HashMap<String, String>();
+    Map<String, String> schemaPathMapping = new HashMap<String, String>();
+
+    for (String sourceName : sourceNames) {
+
+      // load schema for each source
+      LOGGER.info("Loading Schema for {}", sourceName);
+      FSDataInputStream schemaStream =
+          fs.open(new Path(getAndCheck(sourceName + "." + TRANSFORM_INPUT_SCHEMA.toString())));
+      Schema schema = new Schema.Parser().parse(schemaStream);
+      schemaMap.put(sourceName, schema.toString());
+      LOGGER.info("Schema for {}:  \n{}", sourceName, schema);
+
+      // configure input data for each source
+      String inputPathDir = getAndCheck(sourceName + "." + TRANSFORM_INPUT_PATH.toString());
+      LOGGER.info("Input path dir for " + sourceName + ": " + inputPathDir);
+      for (String inputPath : inputPathDir.split(",")) {
+        Path input = new Path(inputPath);
+        FileStatus[] listFiles = fs.listStatus(input);
+        boolean isNested = false;
+        for (FileStatus fileStatus : listFiles) {
+          if (fileStatus.isDirectory()) {
+            isNested = true;
+            Path path = fileStatus.getPath();
+            LOGGER.info("Adding input:" + path);
+            FileInputFormat.addInputPath(job, path);
+            schemaPathMapping.put(path.toString(), sourceName);
+          }
+        }
+        if (!isNested) {
+          LOGGER.info("Adding input:" + inputPath);
+          FileInputFormat.addInputPath(job, input);
+          schemaPathMapping.put(input.toString(), sourceName);
+        }
+      }
+    }
+    StringWriter temp = new StringWriter();
+    OBJECT_MAPPER.writeValue(temp, schemaPathMapping);
+    job.getConfiguration().set("schema.path.mapping", temp.toString());
+
+    temp = new StringWriter();
+    OBJECT_MAPPER.writeValue(temp, schemaMap);
+    job.getConfiguration().set("schema.json.mapping", temp.toString());
+
+    // set transform UDF class
+    getAndSetConfiguration(configuration, TRANSFORM_UDF);
+
+    // set reducers
+    String numReducers = getAndSetConfiguration(configuration, TRANSFORM_NUM_REDUCERS);
+    if (numReducers != null) {
+      job.setNumReduceTasks(Integer.parseInt(numReducers));
+    } else {
+      job.setNumReduceTasks(10);
+    }
+    LOGGER.info("Setting number of reducers : " + job.getNumReduceTasks());
+
+    // Map config
+    job.setMapperClass(GenericTransformMapper.class);
+    // AvroJob.setInputKeySchema(job, inputSchema);
+    job.setInputFormatClass(DelegatingAvroKeyInputFormat.class);
+    job.setMapOutputKeyClass(IntWritable.class);
+    job.setMapOutputValueClass(AvroValue.class);
+    AvroJob.setMapOutputValueSchema(job, outputSchema);
+
+    // Reducer config
+    job.setReducerClass(GenericTransformReducer.class);
+    job.setOutputKeyClass(AvroKey.class);
+    job.setOutputValueClass(NullWritable.class);
+    AvroJob.setOutputKeySchema(job, outputSchema);
+    job.setOutputFormatClass(AvroKeyOutputFormat.class);
+    job.waitForCompletion(true);
+
+    return job;
+  }
+
+  private String getAndSetConfiguration(Configuration configuration,
+      TransformPhaseJobConstants constant) {
+    String value = getAndCheck(constant.toString());
+    configuration.set(constant.toString(), value);
+    return value;
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("usage: config.properties");
+    }
+
+    Properties props = new Properties();
+    props.load(new FileInputStream(args[0]));
+
+    TransformPhaseJob job = new TransformPhaseJob("transform_phase_job", props);
+    job.run();
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformPhaseJobConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformPhaseJobConstants.java
new file mode 100644
index 0000000000..c3fc63229a
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformPhaseJobConstants.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.transform;
+
+public enum TransformPhaseJobConstants {
+  TRANSFORM_INPUT_SCHEMA("transform.input.schema"),
+  TRANSFORM_INPUT_PATH("transform.input.path"),
+  TRANSFORM_OUTPUT_PATH("transform.output.path"),
+  TRANSFORM_OUTPUT_SCHEMA("transform.output.schema"),
+  TRANSFORM_SOURCE_NAMES("transform.source.names"),
+  TRANSFORM_UDF("transform.udf.class"),
+  TRANSFORM_CONFIG_UDF("transform.config.udf.class"),
+  TRANSFORM_NUM_REDUCERS("transform.num.reducers");
+
+  String name;
+
+  TransformPhaseJobConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformUDF.java
new file mode 100644
index 0000000000..2c3e12ecd1
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/transform/TransformUDF.java
@@ -0,0 +1,38 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.transform;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+
+/**
+ * Simple interface to transform a Generic Record
+ */
+public interface TransformUDF {
+
+  /**
+   * Initializes by providing the output schema.
+   * @param outputSchema
+   */
+  void init(Schema outputSchema);
+
+  /**
+   * @param record
+   * @return
+   */
+  GenericRecord transformRecord(String sourceName, GenericRecord record);
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtils.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtils.java
new file mode 100644
index 0000000000..d8184265fb
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtils.java
@@ -0,0 +1,58 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.util;
+
+import java.util.List;
+
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+
+/**
+ * Class to aggregate metric values
+ */
+public class ThirdeyeAggregateMetricUtils {
+
+  /**
+   * Aggregates an array of metricValues into an aggregate array
+   * @param metricTypes - metric types array
+   * @param aggMetricValues - aggregated metric values
+   * @param metricValues - metric values to add
+   */
+  public static void aggregate(List<MetricType> metricTypes, Number[] aggMetricValues, Number[] metricValues) {
+    int numMetrics = aggMetricValues.length;
+    for (int i = 0; i < numMetrics; i++) {
+      MetricType metricType = metricTypes.get(i);
+      switch (metricType) {
+        case SHORT:
+          aggMetricValues[i] = aggMetricValues[i].shortValue() + metricValues[i].shortValue();
+          break;
+        case INT:
+          aggMetricValues[i] = aggMetricValues[i].intValue() + metricValues[i].intValue();
+          break;
+        case FLOAT:
+          aggMetricValues[i] = aggMetricValues[i].floatValue() + metricValues[i].floatValue();
+          break;
+        case DOUBLE:
+          aggMetricValues[i] = aggMetricValues[i].doubleValue() + metricValues[i].doubleValue();
+          break;
+        case LONG:
+        default:
+          aggMetricValues[i] = aggMetricValues[i].longValue() + metricValues[i].longValue();
+          break;
+      }
+    }
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyeAvroUtils.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyeAvroUtils.java
new file mode 100644
index 0000000000..9e89dea3af
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyeAvroUtils.java
@@ -0,0 +1,267 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.util;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.SchemaBuilder;
+import org.apache.avro.SchemaBuilder.BaseFieldTypeBuilder;
+import org.apache.avro.SchemaBuilder.FieldAssembler;
+import org.apache.avro.SchemaBuilder.RecordBuilder;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections.Predicate;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+import org.apache.pinot.common.data.FieldSpec;
+import org.apache.pinot.common.data.FieldSpec.DataType;
+import org.apache.pinot.core.data.readers.AvroRecordReader;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionType;
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+
+/**
+ * This class contains methods to extract avro schema, and get
+ * avro reader from the avro files
+ */
+public class ThirdeyeAvroUtils {
+
+  private static Logger LOGGER = LoggerFactory.getLogger(ThirdeyeAvroUtils.class);
+  /**
+   * extracts avro schema from avro file
+   * @param avroFile
+   * @return
+   * @throws FileNotFoundException
+   * @throws IOException
+   */
+  public static Schema extractSchemaFromAvro(Path avroFile) throws IOException {
+    DataFileStream<GenericRecord> dataStreamReader = getAvroReader(avroFile);
+    Schema avroSchema = dataStreamReader.getSchema();
+    dataStreamReader.close();
+    return avroSchema;
+  }
+
+  /**
+   * Constructs an avro schema from a pinot schema
+   * @param schema
+   * @return
+   */
+  public static Schema constructAvroSchemaFromPinotSchema(org.apache.pinot.common.data.Schema schema) {
+    Schema avroSchema = null;
+
+    RecordBuilder<Schema> recordBuilder = SchemaBuilder.record("record");
+    FieldAssembler<Schema> fieldAssembler = recordBuilder.fields();
+
+    for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
+      String fieldName = fieldSpec.getName();
+      DataType dataType = fieldSpec.getDataType();
+      BaseFieldTypeBuilder<Schema> baseFieldTypeBuilder = fieldAssembler.name(fieldName).type().nullable();
+      switch (dataType) {
+        case BOOLEAN:
+          fieldAssembler = baseFieldTypeBuilder.booleanType().noDefault();
+          break;
+        case DOUBLE:
+          fieldAssembler = baseFieldTypeBuilder.doubleType().noDefault();
+          break;
+        case FLOAT:
+          fieldAssembler = baseFieldTypeBuilder.floatType().noDefault();
+          break;
+        case INT:
+          fieldAssembler = baseFieldTypeBuilder.intType().noDefault();
+          break;
+        case LONG:
+          fieldAssembler = baseFieldTypeBuilder.longType().noDefault();
+          break;
+        case STRING:
+          fieldAssembler = baseFieldTypeBuilder.stringType().noDefault();
+          break;
+        default:
+          break;
+      }
+    }
+
+    avroSchema = fieldAssembler.endRecord();
+    LOGGER.info("Avro Schema {}", avroSchema.toString(true));
+
+    return avroSchema;
+  }
+
+  private static DataFileStream<GenericRecord> getAvroReader(Path avroFile) throws IOException {
+    FileSystem fs = FileSystem.get(new Configuration());
+    if(avroFile.getName().endsWith("gz")) {
+      return new DataFileStream<GenericRecord>(new GZIPInputStream(fs.open(avroFile)), new GenericDatumReader<GenericRecord>());
+    } else {
+      return new DataFileStream<GenericRecord>(fs.open(avroFile), new GenericDatumReader<GenericRecord>());
+    }
+  }
+
+  /**
+   * Extracts the datatype of a field from the avro schema, given the name of the field
+   * @param fieldname
+   * @param schema
+   * @return
+   */
+  public static String getDataTypeForField(String fieldname, Schema schema) {
+    Field field = schema.getField(fieldname);
+    if (field == null) {
+      throw new IllegalStateException("Field " + fieldname + " does not exist in schema");
+    }
+
+    final Schema.Type type = field.schema().getType();
+    if (type == Schema.Type.ARRAY) {
+      throw new RuntimeException("TODO: validate correctness after commit b19a0965044d3e3f4f1541cc4cd9ea60b96a4b99");
+    }
+
+    return DataType.valueOf(extractSchemaFromUnionIfNeeded(field.schema()).getType()).toString();
+  }
+
+  /**
+   * Helper removed from AvroRecordReader in b19a0965044d3e3f4f1541cc4cd9ea60b96a4b99
+   *
+   * @param fieldSchema
+   * @return
+   */
+  private static org.apache.avro.Schema extractSchemaFromUnionIfNeeded(org.apache.avro.Schema fieldSchema) {
+    if ((fieldSchema).getType() == Schema.Type.UNION) {
+      fieldSchema = ((org.apache.avro.Schema) CollectionUtils.find(fieldSchema.getTypes(), new Predicate() {
+        @Override
+        public boolean evaluate(Object object) {
+          return ((org.apache.avro.Schema) object).getType() != Schema.Type.NULL;
+        }
+      }));
+    }
+    return fieldSchema;
+  }
+
+  /**
+   * Finds the avro file in the input folder, and returns its avro schema
+   * @param inputPathDir
+   * @return
+   * @throws IOException
+   */
+  public static Schema getSchema(String inputPathDir) throws IOException  {
+    FileSystem fs = FileSystem.get(new Configuration());
+    Schema avroSchema = null;
+    for (String input : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
+      Path inputPath = new Path(input);
+      for (FileStatus fileStatus : fs.listStatus(inputPath)) {
+        if (fileStatus.isFile() && fileStatus.getPath().getName().endsWith(ThirdEyeConstants.AVRO_SUFFIX)) {
+          LOGGER.info("Extracting schema from {}", fileStatus.getPath());
+          avroSchema = extractSchemaFromAvro(fileStatus.getPath());
+          break;
+        }
+      }
+    }
+    return avroSchema;
+  }
+
+  /**
+   * Constructs dimensionTypes property string from the dimension names with the help of the avro schema
+   * @param dimensionNamesProperty
+   * @param avroSchema
+   * @return
+   */
+  public static String getDimensionTypesProperty(String dimensionNamesProperty, Schema avroSchema) {
+    List<String> dimensionTypesFromSchema = new ArrayList<>();
+    if (StringUtils.isNotBlank(dimensionNamesProperty)) {
+      List<String> dimensionNamesFromConfig = Lists.newArrayList(dimensionNamesProperty.split(ThirdEyeConstants.FIELD_SEPARATOR));
+      for (String dimensionName : dimensionNamesFromConfig) {
+        dimensionTypesFromSchema.add(ThirdeyeAvroUtils.getDataTypeForField(dimensionName, avroSchema));
+      }
+    }
+    return Joiner.on(ThirdEyeConstants.FIELD_SEPARATOR).join(dimensionTypesFromSchema);
+  }
+
+
+  /**
+   * Constructs metricTypes property string from the metric names with the help of the avro schema
+   * @param metricNamesProperty
+   * @param avroSchema
+   * @return
+   */
+  public static String getMetricTypesProperty(String metricNamesProperty, String metricTypesProperty, Schema avroSchema) {
+    List<String> metricTypesFromSchema = new ArrayList<>();
+    List<String> metricNamesFromConfig = Lists.newArrayList(metricNamesProperty.split(ThirdEyeConstants.FIELD_SEPARATOR));
+    for (String metricName : metricNamesFromConfig) {
+      if (ThirdEyeConstants.AUTO_METRIC_COUNT.equals(metricName)) {
+        metricTypesFromSchema.add(DataType.LONG.toString());
+        continue;
+      }
+      metricTypesFromSchema.add(ThirdeyeAvroUtils.getDataTypeForField(metricName, avroSchema));
+    }
+    String validatedMetricTypesProperty = Joiner.on(ThirdEyeConstants.FIELD_SEPARATOR).join(metricTypesFromSchema);
+    if (metricTypesProperty != null) {
+      List<String> metricTypesFromConfig = Lists.newArrayList(metricTypesProperty.split(ThirdEyeConstants.FIELD_SEPARATOR));
+      if (metricTypesFromConfig.size() == metricTypesFromSchema.size()) {
+        for (int i = 0; i < metricNamesFromConfig.size(); i++) {
+          String metricName = metricNamesFromConfig.get(i);
+          String metricTypeFromConfig = metricTypesFromConfig.get(i);
+          String metricTypeFromSchema = metricTypesFromSchema.get(i);
+          if (!metricTypeFromConfig.equals(metricTypeFromSchema)) {
+            LOGGER.warn("MetricType {} defined in config for metric {}, does not match dataType {} from avro schema",
+                metricTypeFromConfig, metricName, metricTypeFromSchema);
+          }
+        }
+        validatedMetricTypesProperty = metricTypesProperty;
+      }
+    }
+    return validatedMetricTypesProperty;
+  }
+
+  public static Object getDimensionFromRecord(GenericRecord record, String dimensionName) {
+    Object dimensionValue = record.get(dimensionName);
+    if (dimensionValue == null) {
+      String dataType = getDataTypeForField(dimensionName, record.getSchema());
+      DimensionType dimensionType = DimensionType.valueOf(dataType);
+      dimensionValue = dimensionType.getDefaultNullvalue();
+    }
+    return dimensionValue;
+  }
+
+  public static Number getMetricFromRecord(GenericRecord record, String metricName) {
+    Number metricValue = (Number) record.get(metricName);
+    if (metricValue == null) {
+      metricValue = ThirdEyeConstants.EMPTY_NUMBER;
+    }
+    return metricValue;
+  }
+
+  public static Number getMetricFromRecord(GenericRecord record, String metricName, MetricType metricType) {
+    Number metricValue = (Number) record.get(metricName);
+    if (metricValue == null) {
+      metricValue = metricType.getDefaultNullValue();
+    }
+    return metricValue;
+  }
+
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyePinotSchemaUtils.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyePinotSchemaUtils.java
new file mode 100644
index 0000000000..e5ce493a2a
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/util/ThirdeyePinotSchemaUtils.java
@@ -0,0 +1,127 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.util;
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.pinot.common.data.DimensionFieldSpec;
+import org.apache.pinot.common.data.FieldSpec;
+import org.apache.pinot.common.data.FieldSpec.DataType;
+import org.apache.pinot.common.data.MetricFieldSpec;
+import org.apache.pinot.common.data.Schema;
+import org.apache.pinot.common.data.TimeFieldSpec;
+import org.apache.pinot.common.data.TimeGranularitySpec;
+import org.apache.pinot.thirdeye.hadoop.config.DimensionSpec;
+import org.apache.pinot.thirdeye.hadoop.config.MetricSpec;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConstants;
+
+/**
+ * This class contains the methods needed to transform
+ * a ThirdEyeConfig into a Pinot Schema
+ */
+public class ThirdeyePinotSchemaUtils {
+
+  private static Logger LOGGER = LoggerFactory.getLogger(ThirdeyePinotSchemaUtils.class);
+
+  /**
+   * Transforms the thirdeyeConfig to pinot schema
+   * Adds default __COUNT metric if not already present
+   * Adds additional columns for all dimensions which
+   * are wither specified as topk or whitelist
+   * and hence have a transformed new column_raw
+   * @param thirdeyeConfig
+   * @return
+   */
+  public static Schema createSchema(ThirdEyeConfig thirdeyeConfig) {
+    Schema schema = new Schema();
+
+    Set<String> transformDimensions = thirdeyeConfig.getTransformDimensions();
+    for (DimensionSpec dimensionSpec : thirdeyeConfig.getDimensions()) {
+      FieldSpec fieldSpec = new DimensionFieldSpec();
+      String dimensionName = dimensionSpec.getName();
+      fieldSpec.setName(dimensionName);
+      fieldSpec.setDataType(DataType.valueOf(dimensionSpec.getDimensionType().toString()));
+      fieldSpec.setSingleValueField(true);
+      schema.addField(fieldSpec);
+
+      if (transformDimensions.contains(dimensionName)) {
+        fieldSpec = new DimensionFieldSpec();
+        dimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
+        fieldSpec.setName(dimensionName);
+        fieldSpec.setDataType(DataType.STRING);
+        fieldSpec.setSingleValueField(true);
+        schema.addField(fieldSpec);
+      }
+    }
+    boolean countIncluded = false;
+    for (MetricSpec metricSpec : thirdeyeConfig.getMetrics()) {
+      FieldSpec fieldSpec = new MetricFieldSpec();
+      String metricName = metricSpec.getName();
+      if (metricName.equals(ThirdEyeConstants.AUTO_METRIC_COUNT)) {
+        countIncluded = true;
+      }
+      fieldSpec.setName(metricName);
+      fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
+      fieldSpec.setSingleValueField(true);
+      schema.addField(fieldSpec);
+    }
+    if (!countIncluded) {
+      FieldSpec fieldSpec = new MetricFieldSpec();
+      String metricName = ThirdEyeConstants.AUTO_METRIC_COUNT;
+      fieldSpec.setName(metricName);
+      fieldSpec.setDataType(DataType.LONG);
+      fieldSpec.setDefaultNullValue(1);
+      schema.addField(fieldSpec);
+    }
+    TimeGranularitySpec incoming =
+        new TimeGranularitySpec(DataType.LONG,
+            thirdeyeConfig.getTime().getTimeGranularity().getSize(),
+            thirdeyeConfig.getTime().getTimeGranularity().getUnit(),
+            thirdeyeConfig.getTime().getTimeFormat(),
+            thirdeyeConfig.getTime().getColumnName());
+    TimeGranularitySpec outgoing =
+        new TimeGranularitySpec(DataType.LONG,
+            thirdeyeConfig.getTime().getTimeGranularity().getSize(),
+            thirdeyeConfig.getTime().getTimeGranularity().getUnit(),
+            thirdeyeConfig.getTime().getTimeFormat(),
+            thirdeyeConfig.getTime().getColumnName());
+
+    schema.addField(new TimeFieldSpec(incoming, outgoing));
+
+    schema.setSchemaName(thirdeyeConfig.getCollection());
+
+    return schema;
+  }
+
+  public static Schema createSchema(String configPath) throws IOException {
+    FileSystem fs = FileSystem.get(new Configuration());
+
+    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.decode(fs.open(new Path(configPath)));
+    LOGGER.info("{}", thirdeyeConfig);
+
+    return createSchema(thirdeyeConfig);
+  }
+
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/DefaultWaitUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/DefaultWaitUDF.java
new file mode 100644
index 0000000000..264f10eb68
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/DefaultWaitUDF.java
@@ -0,0 +1,44 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.wait;
+
+import java.util.Properties;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DefaultWaitUDF implements WaitUDF {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultWaitUDF.class);
+
+  private Properties inputConfig;
+
+  public DefaultWaitUDF() {
+
+  }
+
+  @Override
+  public void init(Properties inputConfig) {
+    this.inputConfig = inputConfig;
+  }
+
+  @Override
+  // default implementation always returns complete
+  public boolean checkCompleteness() {
+    return true;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitPhaseJob.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitPhaseJob.java
new file mode 100644
index 0000000000..6644c40aad
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitPhaseJob.java
@@ -0,0 +1,68 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.wait;
+
+import java.lang.reflect.Constructor;
+import java.util.Properties;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.pinot.thirdeye.hadoop.wait.WaitPhaseJobConstants.*;
+
+public class WaitPhaseJob {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(WaitPhaseJob.class);
+
+  private String name;
+  private Properties props;
+
+  public WaitPhaseJob(String name, Properties props) {
+    this.name = name;
+    this.props = props;
+  }
+
+  public void run() {
+
+    try {
+      String thirdeyeWaitClass = getAndCheck(WAIT_UDF_CLASS.toString());
+
+      if (thirdeyeWaitClass != null) {
+        LOGGER.info("Initializing class {}", thirdeyeWaitClass);
+        Constructor<?> constructor = Class.forName(thirdeyeWaitClass).getConstructor();
+        WaitUDF waitUdf = (WaitUDF) constructor.newInstance();
+        waitUdf.init(props);
+
+        boolean complete = waitUdf.checkCompleteness();
+        if (!complete) {
+          throw new RuntimeException("Input folder {} has not received all records");
+        }
+      }
+    }catch (Exception e) {
+      LOGGER.error("Exception in waiting for inputs", e);
+    }
+  }
+
+  private String getAndCheck(String propName) {
+    String propValue = props.getProperty(propName);
+    if (propValue == null) {
+      throw new IllegalArgumentException(propName + " required property");
+    }
+    return propValue;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitPhaseJobConstants.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitPhaseJobConstants.java
new file mode 100644
index 0000000000..5a95685a23
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitPhaseJobConstants.java
@@ -0,0 +1,34 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.wait;
+
+public enum WaitPhaseJobConstants {
+  WAIT_UDF_CLASS("wait.udf.class"),
+  WAIT_POLL_TIMEOUT("wait.poll.timeout"),
+  WAIT_POLL_FREQUENCY("wait.poll.frequency");
+
+  String name;
+
+  WaitPhaseJobConstants(String name) {
+    this.name = name;
+  }
+
+  public String toString() {
+    return name;
+  }
+
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitUDF.java b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitUDF.java
new file mode 100644
index 0000000000..6fc1ac6fe3
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/main/java/org/apache/pinot/thirdeye/hadoop/wait/WaitUDF.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.wait;
+
+import java.util.Properties;
+
+
+/**
+ * Simple interface to check completeness of input folder
+ */
+public interface WaitUDF {
+
+  /**
+   * Initializes by providing input configs.
+   * @param inputConfig
+   */
+  void init(Properties inputConfig);
+
+  /**
+   * @return completeness status
+   * @throws IOException
+   * @throws
+   */
+   boolean checkCompleteness() throws Exception;
+}
diff --git a/thirdeye/thirdeye-hadoop/src/main/resources/sample-join-config.yml b/thirdeye/thirdeye-hadoop/src/main/resources/sample-join-config.yml
index a06bb3e436..467d5044bb 100644
--- a/thirdeye/thirdeye-hadoop/src/main/resources/sample-join-config.yml
+++ b/thirdeye/thirdeye-hadoop/src/main/resources/sample-join-config.yml
@@ -1,9 +1,9 @@
 join.output.schema=/hdfs/path/to/output/schema/schema.avsc
 join.output.path=/hdfs/path/to/output
 
-join.config.udf.class=com.linkedin.thirdeye.hadoop.join.DefaultJoinConfigUDF
-join.key.extractor.class=com.linkedin.thirdeye.hadoop.join.DefaultJoinKeyExtractor
-join.udf.class=com.linkedin.thirdeye.hadoop.join.GenericJoinUDF
+join.config.udf.class=org.apache.pinot.thirdeye.hadoop.join.DefaultJoinConfigUDF
+join.key.extractor.class=org.apache.pinot.thirdeye.hadoop.join.DefaultJoinKeyExtractor
+join.udf.class=org.apache.pinot.thirdeye.hadoop.join.GenericJoinUDF
 
 join.source.names=source1,source2
 source1.join.input.schema=/hdfs/path/to/source1/schema/schema.avsc
diff --git a/thirdeye/thirdeye-hadoop/src/main/resources/sample-transform-config.yml b/thirdeye/thirdeye-hadoop/src/main/resources/sample-transform-config.yml
index ec26e2458e..564ea52d11 100644
--- a/thirdeye/thirdeye-hadoop/src/main/resources/sample-transform-config.yml
+++ b/thirdeye/thirdeye-hadoop/src/main/resources/sample-transform-config.yml
@@ -8,7 +8,7 @@ source1.transform.input.path=/hdfs/path/to/source1/avro/input/
 source2.transform.input.path=/hdfs/path/to/source2/avro/input/
 transform.output.path=/hdfs/path/to/output/
 
-transform.udf.class=com.linkedin.thirdeye.hadoop.transform.DefaultTransformUDF
-transform.config.udf.class=com.linkedin.thirdeye.hadoop.transform.DefaultTransformConfigUDF
+transform.udf.class=org.apache.pinot.thirdeye.hadoop.transform.DefaultTransformUDF
+transform.config.udf.class=org.apache.pinot.thirdeye.hadoop.transform.DefaultTransformConfigUDF
 
 transform.num.reducers=10
\ No newline at end of file
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseTest.java
deleted file mode 100644
index a8e1ce420f..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/aggregation/AggregationPhaseTest.java
+++ /dev/null
@@ -1,265 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.aggregation;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Properties;
-import java.util.TreeMap;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.hadoop.io.AvroSerialization;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mrunit.mapreduce.MapDriver;
-import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
-import org.apache.hadoop.mrunit.testutil.TemporaryPath;
-import org.apache.hadoop.mrunit.types.Pair;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.collect.Lists;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.AggregationMapper;
-import com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.AggregationReducer;
-
-/**
- * This tests mapper of Aggregation phase, to check conversion of time column to bucket time
- * This also tests reducer to check aggregation using new time values
- */
-public class AggregationPhaseTest {
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-  private static final String HADOOP_IO_SERIALIZATION = "io.serializations";
-  private static final String AVRO_SCHEMA = "schema.avsc";
-
-  private String outputPath;
-  private Schema inputSchema;
-  private ThirdEyeConfig thirdeyeConfig;
-  private AggregationPhaseConfig aggPhaseConfig;
-  Properties props = new Properties();
-
-  private MapDriver<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> mapDriver;
-  private ReduceDriver<BytesWritable, BytesWritable, AvroKey<GenericRecord>, NullWritable> reduceDriver;
-
-  private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
-    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
-    String[] finalSerializations = new String[currentSerializations.length + 1];
-    System.arraycopy(currentSerializations, 0, finalSerializations, 0,
-        currentSerializations.length);
-    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
-    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);
-
-    AvroSerialization.addToConfiguration(conf);
-    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
-    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
-  }
-
-  private List<GenericRecord> generateTestMapperData() throws Exception {
-    List<GenericRecord> inputRecords = new ArrayList<GenericRecord>();
-
-    // 2016-04-27T190000
-    GenericRecord input = new GenericData.Record(inputSchema);
-    input.put("d1", "abc1");
-    input.put("d2", 501L);
-    input.put("d3", "xyz1");
-    input.put("hoursSinceEpoch", 1461808800000L);
-    input.put("m1", 100);
-    input.put("m2", 20);
-    inputRecords.add(input);
-
-    // 2016-04-27T191000
-    input = new GenericData.Record(inputSchema);
-    input.put("d1", "abc1");
-    input.put("d2", 501L);
-    input.put("d3", "xyz1");
-    input.put("hoursSinceEpoch", 1461809400000L);
-    input.put("m1", 100);
-    input.put("m2", 20);
-    inputRecords.add(input);
-
-    // 2016-04-27T20
-    input = new GenericData.Record(inputSchema);
-    input.put("d1", "abc2");
-    input.put("d2", 502L);
-    input.put("d3", "xyz2");
-    input.put("hoursSinceEpoch", 1461812400000L);
-    input.put("m1", 10);
-    input.put("m2", 2);
-    inputRecords.add(input);
-
-    return inputRecords;
-  }
-
-
-  private List<Pair<BytesWritable,List<BytesWritable>>> generateTestReduceData(List<Pair<BytesWritable, BytesWritable>> result) throws Exception {
-    List<Pair<BytesWritable, List<BytesWritable>>> inputRecords = new ArrayList<>();
-    Map<BytesWritable, List<BytesWritable>> inputMap = new TreeMap<>();
-
-    for (Pair<BytesWritable, BytesWritable> pair : result) {
-      inputMap.put(pair.getFirst(), new ArrayList<BytesWritable>());
-    }
-
-    for (Pair<BytesWritable, BytesWritable> pair : result) {
-      inputMap.get(pair.getFirst()).add(pair.getSecond());
-    }
-    for (Entry<BytesWritable, List<BytesWritable>> listPair : inputMap.entrySet()) {
-      inputRecords.add(new Pair<BytesWritable, List<BytesWritable>>(listPair.getKey(), listPair.getValue()));
-    }
-    return inputRecords;
-  }
-
-  @Before
-  public void setUp() throws Exception {
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "INT,INT");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "hoursSinceEpoch");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_SIZE.toString(), "1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_TYPE.toString(), TimeUnit.HOURS.toString());
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_SIZE.toString(), "1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_TYPE.toString(), TimeUnit.MILLISECONDS.toString());
-    thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    aggPhaseConfig = AggregationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-
-    // Mapper config
-    AggregationMapper mapper = new AggregationMapper();
-    mapDriver = MapDriver.newMapDriver(mapper);
-    Configuration configuration = mapDriver.getConfiguration();
-    configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
-        + "org.apache.hadoop.io.serializer.WritableSerialization");
-
-    configuration.set(AggregationPhaseConstants.AGG_PHASE_THIRDEYE_CONFIG.toString(),
-        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    inputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-    setUpAvroSerialization(mapDriver.getConfiguration(), inputSchema);
-
-    // Reducer config
-    AggregationReducer reducer = new AggregationReducer();
-    reduceDriver = ReduceDriver.newReduceDriver(reducer);
-    configuration = reduceDriver.getConfiguration();
-    configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
-        + "org.apache.hadoop.io.serializer.WritableSerialization");
-
-    Schema reducerSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-    configuration.set(AggregationPhaseConstants.AGG_PHASE_AVRO_SCHEMA.toString(), reducerSchema.toString());
-
-    configuration.set(AggregationPhaseConstants.AGG_PHASE_THIRDEYE_CONFIG.toString(),
-        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    TemporaryPath tmpPath = new TemporaryPath();
-    outputPath = tmpPath.toString();
-    configuration.set(AggregationPhaseConstants.AGG_PHASE_OUTPUT_PATH.toString(), outputPath);
-    setUpAvroSerialization(reduceDriver.getConfiguration(), reducerSchema);
-
-  }
-
-  @Test
-  public void testAggregationPhase() throws Exception {
-
-    int recordCount = 0;
-    List<GenericRecord> inputRecords = generateTestMapperData();
-    for (GenericRecord record : inputRecords) {
-      AvroKey<GenericRecord> inKey = new AvroKey<GenericRecord>();
-      inKey.datum(record);
-      mapDriver.addInput(new Pair<AvroKey<GenericRecord>, NullWritable>(inKey, NullWritable.get()));
-      recordCount++;
-    }
-
-    List<Pair<BytesWritable, BytesWritable>> mapResult = mapDriver.run();
-    Assert.assertEquals("Incorrect number of records emitted by mapper", recordCount, mapResult.size());
-
-    AggregationPhaseMapOutputKey keyWrapper =
-        AggregationPhaseMapOutputKey.fromBytes(mapResult.get(0).getFirst().getBytes(), aggPhaseConfig.getDimensionTypes());
-    Assert.assertEquals(406058, keyWrapper.getTime());
-    keyWrapper = AggregationPhaseMapOutputKey.fromBytes(mapResult.get(1).getFirst().getBytes(), aggPhaseConfig.getDimensionTypes());
-    Assert.assertEquals(406058, keyWrapper.getTime());
-    keyWrapper = AggregationPhaseMapOutputKey.fromBytes(mapResult.get(2).getFirst().getBytes(), aggPhaseConfig.getDimensionTypes());
-    Assert.assertEquals(406059, keyWrapper.getTime());
-
-    List<Pair<BytesWritable, List<BytesWritable>>> reduceInput = generateTestReduceData(mapResult);
-    reduceDriver.addAll(reduceInput);
-
-    List<Pair<AvroKey<GenericRecord>, NullWritable>> reduceResult = reduceDriver.run();
-    Assert.assertEquals("Incorrect number of records returned by aggregation reducer", 2, reduceResult.size());
-
-    GenericRecord record = reduceResult.get(0).getFirst().datum();
-    List<Object> dimensionsExpected = Lists.newArrayList();
-    dimensionsExpected.add("abc1");
-    dimensionsExpected.add(501L);
-    dimensionsExpected.add("xyz1");
-    List<Object> dimensionsActual = getDimensionsFromRecord(record);
-    Assert.assertEquals(dimensionsExpected, dimensionsActual);
-    List<Integer> metricsExpected = Lists.newArrayList(200, 40);
-    List<Integer> metricsActual = getMetricsFromRecord(record);
-    Assert.assertEquals(metricsExpected, metricsActual);
-    Assert.assertEquals(406058, (long) record.get("hoursSinceEpoch"));
-
-
-    record = reduceResult.get(1).getFirst().datum();
-    dimensionsExpected = Lists.newArrayList();
-    dimensionsExpected.add("abc2");
-    dimensionsExpected.add(502L);
-    dimensionsExpected.add("xyz2");
-    dimensionsActual = getDimensionsFromRecord(record);
-    Assert.assertEquals(dimensionsExpected, dimensionsActual);
-    metricsExpected = Lists.newArrayList(10, 2);
-    metricsActual = getMetricsFromRecord(record);
-    Assert.assertEquals(metricsExpected, metricsActual);
-    Assert.assertEquals(406059, (long) record.get("hoursSinceEpoch"));
-  }
-
-  private List<Object> getDimensionsFromRecord(GenericRecord record) {
-    List<Object> dimensionsActual = new ArrayList<>();
-    dimensionsActual.add(record.get("d1"));
-    dimensionsActual.add(record.get("d2"));
-    dimensionsActual.add(record.get("d3"));
-    return dimensionsActual;
-  }
-
-  private List<Integer> getMetricsFromRecord(GenericRecord record) {
-    List<Integer> metricsActual = new ArrayList<>();
-    metricsActual.add((int) record.get("m1"));
-    metricsActual.add((int) record.get("m2"));
-    return metricsActual;
-  }
-
-
-  @After
-  public void cleanUp() throws IOException {
-
-    File f = new File(outputPath);
-    FileUtils.deleteDirectory(f);
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigTest.java
deleted file mode 100644
index a7fa02a05f..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/config/ThirdEyeConfigTest.java
+++ /dev/null
@@ -1,249 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.config;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.concurrent.TimeUnit;
-
-import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
-import com.linkedin.thirdeye.hadoop.config.TopkWhitelistSpec;
-
-public class ThirdEyeConfigTest {
-
-  private Properties props;
-  private ThirdEyeConfig thirdeyeConfig;
-  private ThirdEyeConfig config;
-
-  @BeforeClass
-  public void setup() {
-    props = new Properties();
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2,m3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "LONG,FLOAT,INT");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "t1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_TYPE.toString(), "DAYS");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_SIZE.toString(), "10");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_SPLIT_THRESHOLD.toString(), "1000");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_SPLIT_ORDER.toString(), "d1,d2,d3");
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString(), "m1,m3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), "0.02,0.1");
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), "d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d2", "m1,m2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d2", "20,30");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d3", "m1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d3", "50");
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), "d1,d2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d1", "x,y");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d2", "500");
-
-    thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-
-  }
-
-
-
-  @Test(expectedExceptions = IllegalArgumentException.class)
-  public void testTableNameConfig() throws IllegalArgumentException {
-    Assert.assertEquals("collection", thirdeyeConfig.getCollection(), "Collection name not correctly set");
-    try {
-      props.remove(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString());
-      config = ThirdEyeConfig.fromProperties(props);
-    } finally {
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
-    }
-  }
-
-  @Test(expectedExceptions = IllegalArgumentException.class)
-  public void testDimensionsConfig() throws IllegalArgumentException {
-    Assert.assertEquals(3, thirdeyeConfig.getDimensionNames().size(), "Incorrect number of dimensions");
-    Assert.assertEquals(new String[]{"d1", "d2", "d3"}, thirdeyeConfig.getDimensionNames().toArray(), "Incorrect dimensions");
-
-    try {
-      props.remove(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString());
-      config = ThirdEyeConfig.fromProperties(props);
-    } finally {
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
-    }
-  }
-
-  @Test
-  public void testMetricsConfig() throws IllegalArgumentException {
-    boolean failed = false;
-    Assert.assertEquals(3, thirdeyeConfig.getMetricNames().size(), "Incorrect number of metrics");
-    Assert.assertEquals(3, thirdeyeConfig.getMetrics().size(), "Incorrect number of metric specs");
-    Assert.assertEquals(new String[]{"m1", "m2", "m3"}, thirdeyeConfig.getMetricNames().toArray(), "Incorrect metrics");
-    MetricType[] actualMetricTypes = new MetricType[3];
-    for (int i = 0; i < 3; i++) {
-      actualMetricTypes[i] = thirdeyeConfig.getMetrics().get(i).getType();
-    }
-    Assert.assertEquals(actualMetricTypes, new MetricType[]{MetricType.LONG, MetricType.FLOAT, MetricType.INT}, "Incorrect metric specs");
-
-    try {
-      props.remove(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString());
-      config = ThirdEyeConfig.fromProperties(props);
-    } catch (IllegalArgumentException e) {
-      failed = true;
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2,m3");
-    }
-    Assert.assertTrue(failed, "Expected exception due to missing metric names property");
-
-    failed = false;
-    try {
-      props.remove(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString());
-      config = ThirdEyeConfig.fromProperties(props);
-    } catch (IllegalArgumentException e) {
-      failed = true;
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "LONG,FLOAT,INT");
-    }
-    Assert.assertTrue(failed, "Expected exception due to missing metric types property");
-
-    failed = false;
-    try {
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
-      config = ThirdEyeConfig.fromProperties(props);
-    } catch (IllegalStateException e) {
-      failed = true;
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2,m3");
-    }
-    Assert.assertTrue(failed, "Expecetd exception due to inequal number of metric names and types in properties");
-  }
-
-  @Test
-  public void testTimeConfig() throws IllegalArgumentException {
-    boolean failed = false;
-    Assert.assertEquals(thirdeyeConfig.getTime().getColumnName(), "t1", "Incorrect time column name");
-    Assert.assertNull(thirdeyeConfig.getInputTime(), "Incorrect input time column name");
-    Assert.assertEquals(thirdeyeConfig.getTime().getTimeGranularity().getSize(), 10, "Incorrect time size");
-    Assert.assertEquals(thirdeyeConfig.getTime().getTimeGranularity().getUnit(), TimeUnit.DAYS, "Incorrect time unit");
-
-    try {
-      props.remove(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString());
-      config = ThirdEyeConfig.fromProperties(props);
-    } catch (IllegalArgumentException e) {
-      failed = true;
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "t1");
-    }
-    Assert.assertTrue(failed, "Expected exception due to missing time column property");
-
-    props.remove(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_SIZE.toString());
-    props.remove(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_TYPE.toString());
-
-    config = ThirdEyeConfig.fromProperties(props);
-    Assert.assertEquals(config.getTime().getTimeGranularity().getSize(), 1, "Incorrect default time size");
-    Assert.assertEquals(config.getTime().getTimeGranularity().getUnit(), TimeUnit.HOURS, "Incorrect default time unit");
-  }
-
-  @Test
-  public void testSplitConfig() throws Exception {
-    Assert.assertEquals(thirdeyeConfig.getSplit().getThreshold(), 1000, "Incorrect split threshold");
-    Assert.assertEquals(thirdeyeConfig.getSplit().getOrder().toArray(), new String[]{"d1", "d2", "d3"}, "Incorrect split order");
-
-    props.remove(ThirdEyeConfigProperties.THIRDEYE_SPLIT_THRESHOLD.toString());
-    config = ThirdEyeConfig.fromProperties(props);
-    Assert.assertEquals(config.getSplit(), null, "Default split should be null");
-  }
-
-  @Test
-  public void testTopKWhitelistConfig() throws IllegalArgumentException {
-    boolean failed = false;
-    TopkWhitelistSpec topKWhitelistSpec = thirdeyeConfig.getTopKWhitelist();
-    // others values
-    Map<String, String> nonWhitelistValueMap = topKWhitelistSpec.getNonWhitelistValue();
-    Map<String, String> expectedNonWhitelistMap = new HashMap<>();
-    expectedNonWhitelistMap.put("d1", "other");
-    expectedNonWhitelistMap.put("d2","0");
-    Assert.assertEquals(nonWhitelistValueMap, expectedNonWhitelistMap);
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_NONWHITELIST_VALUE_DIMENSION.toString() + ".d1", "dummy");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_NONWHITELIST_VALUE_DIMENSION.toString() + ".d2", "-1");
-    config = ThirdEyeConfig.fromProperties(props);
-    topKWhitelistSpec = config.getTopKWhitelist();
-    nonWhitelistValueMap = topKWhitelistSpec.getNonWhitelistValue();
-    expectedNonWhitelistMap = new HashMap<>();
-    expectedNonWhitelistMap.put("d1", "dummy");
-    expectedNonWhitelistMap.put("d2", "-1");
-    Assert.assertEquals(nonWhitelistValueMap, expectedNonWhitelistMap);
-
-    // thresholds
-    Map<String, Double> threshold = topKWhitelistSpec.getThreshold();
-    Assert.assertEquals(threshold.size(), 2, "Incorrect metric thresholds size");
-    Assert.assertEquals(threshold.get("m1") == 0.02 && threshold.get("m3") == 0.1, true, "Incorrect metric thresholds config");
-    try {
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), "0.1");
-      config = ThirdEyeConfig.fromProperties(props);
-    } catch (IllegalStateException e) {
-      failed = true;
-    }
-    Assert.assertTrue(failed, "Expected exception due to unequal number of metrics and threshold");
-    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString());
-    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString());
-    config = ThirdEyeConfig.fromProperties(props);
-    Assert.assertEquals(config.getTopKWhitelist().getThreshold(), null, "Default threshold config should be null");
-
-    // whitelist
-    Map<String, List<String>> whitelist = topKWhitelistSpec.getWhitelist();
-    Assert.assertEquals(whitelist.size(), 2, "Incorrect size of whitelist dimensions");
-    List<String> expectedWhitelistValues = new ArrayList<>();
-    expectedWhitelistValues.add("x"); expectedWhitelistValues.add("y");
-    Assert.assertEquals(whitelist.get("d1"), expectedWhitelistValues, "Incorrect whitelist config");
-    expectedWhitelistValues = new ArrayList<>();
-    expectedWhitelistValues.add("500");
-    Assert.assertEquals(whitelist.get("d2"), expectedWhitelistValues, "Incorrect whitelist config");
-    props.remove(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString());
-    config = ThirdEyeConfig.fromProperties(props);
-    Assert.assertEquals(config.getTopKWhitelist().getWhitelist(), null, "Default whitelist config should be null");
-
-    // topk
-    List<TopKDimensionToMetricsSpec> topk = topKWhitelistSpec.getTopKDimensionToMetricsSpec();
-    Assert.assertEquals(topk.size(), 2, "Incorrect topk dimensions config size");
-    TopKDimensionToMetricsSpec topkSpec = topk.get(0);
-    Assert.assertEquals(topkSpec.getDimensionName().equals("d2")
-          && topkSpec.getTopk().size() == 2
-          && topkSpec.getTopk().get("m1") == 20
-          && topkSpec.getTopk().get("m2") == 30, true, "Incorrect topk config");
-    topkSpec = topk.get(1);
-    Assert.assertEquals(topkSpec.getDimensionName().equals("d3")
-        && topkSpec.getTopk().size() == 1
-        && topkSpec.getTopk().get("m1") == 50, true, "Incorrect topk config");
-    failed = false;
-    try {
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d3", "m1");
-      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d3", "50,50");
-      config = ThirdEyeConfig.fromProperties(props);
-    } catch (IllegalStateException e) {
-      failed = true;
-    }
-    Assert.assertTrue(failed, "Expecetd exception due to inequal number of metrics and kvalues for dimension");
-    props.remove(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString());
-    config = ThirdEyeConfig.fromProperties(props);
-    Assert.assertEquals(config.getTopKWhitelist(), null, "Default topk should be null");
-
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnNoTransformationTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnNoTransformationTest.java
deleted file mode 100644
index b4d98c4c40..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnNoTransformationTest.java
+++ /dev/null
@@ -1,325 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-import java.io.DataInput;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Random;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import junit.framework.Assert;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.hadoop.io.AvroSerialization;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mrunit.mapreduce.MapDriver;
-import org.apache.hadoop.mrunit.testutil.TemporaryPath;
-import org.apache.hadoop.mrunit.types.Pair;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.derivedcolumn.transformation.DerivedColumnTransformationPhaseConstants;
-import com.linkedin.thirdeye.hadoop.topk.TopKDimensionValues;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-/**
- * This test, tests the scenario of derived column transformation,
- * where the original schema and new schema are identical,
- * as there are no values in the topk file
- */
-
-public class DerivedColumnNoTransformationTest {
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-  private static final String HADOOP_IO_SERIALIZATION = "io.serializations";
-
-  private static final String AVRO_SCHEMA = "schema.avsc";
-  private static final String NO_TRANSFORMATION_SCHEMA = "no_transformation_schema.avsc";
-  private static final String TOPK_PATH = "topk_path";
-  private String outputPath;
-
-  Properties props = new Properties();
-
-  private MapDriver<AvroKey<GenericRecord>, NullWritable, AvroKey<GenericRecord>, NullWritable> mapDriver;
-
-  private long generateRandomHoursSinceEpoch() {
-    Random r = new Random();
-    // setting base value to year 2012
-    long unixtime = (long) (1293861599 + r.nextDouble() * 60 * 60 * 24 * 365);
-    return TimeUnit.SECONDS.toHours(unixtime);
-  }
-
-  private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
-    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
-    String[] finalSerializations = new String[currentSerializations.length + 1];
-    System.arraycopy(currentSerializations, 0, finalSerializations, 0,
-        currentSerializations.length);
-    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
-    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);
-
-    AvroSerialization.addToConfiguration(conf);
-    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
-    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
-
-  }
-
-  private void resetAvroSerialization() throws IOException {
-    Configuration conf = mapDriver.getConfiguration();
-    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
-        + "org.apache.hadoop.io.serializer.WritableSerialization");
-    Schema outputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(NO_TRANSFORMATION_SCHEMA));
-
-    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
-    String[] finalSerializations = new String[currentSerializations.length + 1];
-    System.arraycopy(currentSerializations, 0, finalSerializations, 0,
-        currentSerializations.length);
-    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
-    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);
-
-    AvroSerialization.addToConfiguration(conf);
-    AvroSerialization.setKeyWriterSchema(conf, outputSchema);
-    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
-
-  }
-
-  private List<GenericRecord> generateTestData() throws Exception {
-    Schema schema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-    List<GenericRecord> inputRecords = new ArrayList<GenericRecord>();
-
-    GenericRecord input = new GenericData.Record(schema);
-    input.put("d1", "abc1");
-    input.put("d2", 501L);
-    input.put("d3", "xyz1");
-    input.put("hoursSinceEpoch", generateRandomHoursSinceEpoch());
-    input.put("m1", 10);
-    input.put("m2", 20);
-    inputRecords.add(input);
-
-    input = new GenericData.Record(schema);
-    input.put("d1", "abc2");
-    input.put("d2", 502L);
-    input.put("d3", "xyz2");
-    input.put("hoursSinceEpoch", generateRandomHoursSinceEpoch());
-    input.put("m1", 10);
-    input.put("m2", 20);
-    inputRecords.add(input);
-
-    return inputRecords;
-  }
-
-  @Before
-  public void setUp() throws Exception {
-    DerivedColumnNoTransformationPhaseMapper mapper = new DerivedColumnNoTransformationPhaseMapper();
-    mapDriver = MapDriver.newMapDriver(mapper);
-    Configuration configuration = mapDriver.getConfiguration();
-    configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
-        + "org.apache.hadoop.io.serializer.WritableSerialization");
-
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "INT,INT");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "hoursSinceEpoch");
-
-    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    configuration.set(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(),
-        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    Schema inputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-    setUpAvroSerialization(mapDriver.getConfiguration(), inputSchema);
-
-    Schema outputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(NO_TRANSFORMATION_SCHEMA));
-    configuration.set(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(),
-        outputSchema.toString());
-
-    configuration.set(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString(),
-        TOPK_PATH);
-
-    TemporaryPath tmpPath = new TemporaryPath();
-    outputPath = tmpPath.toString();
-    configuration.set(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH.toString(), outputPath);
-  }
-
-  @Test
-  public void testTopKColumnTransformationPhase() throws Exception {
-    int recordCount = 0;
-
-    List<GenericRecord> inputRecords = generateTestData();
-    for (GenericRecord record : inputRecords) {
-      AvroKey<GenericRecord> inKey = new AvroKey<GenericRecord>();
-      inKey.datum(record);
-      mapDriver.addInput(new Pair<AvroKey<GenericRecord>, NullWritable>(inKey, NullWritable.get()));
-      recordCount++;
-    }
-
-    resetAvroSerialization();
-    List<Pair<AvroKey<GenericRecord>, NullWritable>> result = mapDriver.run();
-    Assert.assertEquals(recordCount, result.size());
-
-    for (Pair<AvroKey<GenericRecord>, NullWritable> pair : result) {
-      GenericRecord datum = pair.getFirst().datum();
-      System.out.println(datum.getSchema().getFields().size());
-      Assert.assertEquals("Input records must contain same number of fields as output record, when schemas are not transformed",
-          datum.getSchema().getFields().size(), 6);
-    }
-  }
-
-  @After
-  public void cleanUp() throws IOException {
-
-    File f = new File(outputPath);
-    FileUtils.deleteDirectory(f);
-  }
-
-  public static class DerivedColumnNoTransformationPhaseMapper
-  extends Mapper<AvroKey<GenericRecord>, NullWritable, AvroKey<GenericRecord>, NullWritable> {
-
-    private Schema outputSchema;
-    private ThirdEyeConfig thirdeyeConfig;
-    private DerivedColumnTransformationPhaseConfig config;
-    private List<String> dimensionsNames;
-    private List<DimensionType> dimensionTypes;
-    private List<String> metricNames;
-    private TopKDimensionValues topKDimensionValues;
-    private Map<String, Set<String>> topKDimensionsMap;
-    private String timeColumnName;
-    private List<MetricType> metricTypes;
-    private Map<String, List<String>> whitelist;
-    private Map<String, String> nonWhitelistValueMap;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-      Configuration configuration = context.getConfiguration();
-      FileSystem fs = FileSystem.get(configuration);
-
-      thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-      config = DerivedColumnTransformationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-      dimensionsNames = config.getDimensionNames();
-      dimensionTypes = config.getDimensionTypes();
-      metricNames = config.getMetricNames();
-      metricTypes = config.getMetricTypes();
-      timeColumnName = config.getTimeColumnName();
-      whitelist = config.getWhitelist();
-      nonWhitelistValueMap = config.getNonWhitelistValue();
-
-      outputSchema = new Schema.Parser().parse(configuration.get(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString()));
-
-      Path topKPath = new Path(configuration.get(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString())
-          + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE);
-      topKDimensionValues = new TopKDimensionValues();
-      if (fs.exists(topKPath)) {
-        FSDataInputStream topkValuesStream = fs.open(topKPath);
-        topKDimensionValues = OBJECT_MAPPER.readValue((DataInput)topkValuesStream, TopKDimensionValues.class);
-        topkValuesStream.close();
-      }
-      topKDimensionsMap = topKDimensionValues.getTopKDimensions();
-    }
-
-
-    @Override
-    public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
-        throws IOException, InterruptedException {
-
-      // input record
-      GenericRecord inputRecord = key.datum();
-
-      // output record
-      GenericRecord outputRecord = new Record(outputSchema);
-
-      // dimensions
-      for (int i = 0; i < dimensionsNames.size(); i++) {
-        String dimensionName = dimensionsNames.get(i);
-        DimensionType dimensionType = dimensionTypes.get(i);
-        Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
-        String dimensionValueStr = String.valueOf(dimensionValue);
-
-        // add original dimension value with whitelist applied
-        Object whitelistDimensionValue = dimensionValue;
-        if (whitelist != null) {
-          List<String> whitelistDimensions = whitelist.get(dimensionName);
-          if (CollectionUtils.isNotEmpty(whitelistDimensions)) {
-            // whitelist config exists for this dimension but value not present in whitelist
-            if (!whitelistDimensions.contains(dimensionValueStr)) {
-              whitelistDimensionValue = dimensionType.getValueFromString(nonWhitelistValueMap.get(dimensionName));
-            }
-          }
-        }
-        outputRecord.put(dimensionName, whitelistDimensionValue);
-
-        // add column for topk, if topk config exists for that column
-        if (topKDimensionsMap.containsKey(dimensionName)) {
-          Set<String> topKDimensionValues = topKDimensionsMap.get(dimensionName);
-          // if topk config exists for that dimension
-          if (CollectionUtils.isNotEmpty(topKDimensionValues)) {
-            String topkDimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
-            Object topkDimensionValue = dimensionValue;
-            // topk config exists for this dimension, but value not present in topk
-            if (!topKDimensionValues.contains(dimensionValueStr) &&
-                (whitelist == null || whitelist.get(dimensionName) == null || !whitelist.get(dimensionName).contains(dimensionValueStr))) {
-              topkDimensionValue = ThirdEyeConstants.OTHER;
-            }
-            outputRecord.put(topkDimensionName, topkDimensionValue);
-          }
-        }
-      }
-
-
-      // metrics
-      for (int i = 0; i < metricNames.size(); i ++) {
-        String metricName = metricNames.get(i);
-        MetricType metricType = metricTypes.get(i);
-        outputRecord.put(metricName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metricName, metricType));
-      }
-
-      // time
-      outputRecord.put(timeColumnName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName));
-
-      AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
-      context.write(outputKey, NullWritable.get());
-    }
-
-    @Override
-    public void cleanup(Context context) throws IOException, InterruptedException {
-
-    }
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationTest.java
deleted file mode 100644
index d65fe83572..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedColumnTransformationTest.java
+++ /dev/null
@@ -1,323 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-import java.io.DataInput;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Random;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import junit.framework.Assert;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericData.Record;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.hadoop.io.AvroSerialization;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.commons.collections.CollectionUtils;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mrunit.mapreduce.MapDriver;
-import org.apache.hadoop.mrunit.testutil.TemporaryPath;
-import org.apache.hadoop.mrunit.types.Pair;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.linkedin.thirdeye.hadoop.config.DimensionType;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.topk.TopKDimensionValues;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAvroUtils;
-
-/**
- * This test will test mapper of DerivedColumnTransformation phase,
- * to see if new columns have been added according to the topk values file
- */
-public class DerivedColumnTransformationTest {
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-  private static final String HADOOP_IO_SERIALIZATION = "io.serializations";
-
-  private static final String AVRO_SCHEMA = "schema.avsc";
-  private static final String TRANSFORMATION_SCHEMA = "transformation_schema.avsc";
-  private static final String TOPK_PATH = "topk_path";
-  private String outputPath;
-
-  Properties props = new Properties();
-
-  private MapDriver<AvroKey<GenericRecord>, NullWritable, AvroKey<GenericRecord>, NullWritable> mapDriver;
-
-  private long generateRandomHoursSinceEpoch() {
-    Random r = new Random();
-    // setting base value to year 2012
-    long unixtime = (long) (1293861599 + r.nextDouble() * 60 * 60 * 24 * 365);
-    return TimeUnit.SECONDS.toHours(unixtime);
-  }
-
-
-  private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
-    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
-    String[] finalSerializations = new String[currentSerializations.length + 1];
-    System.arraycopy(currentSerializations, 0, finalSerializations, 0,
-        currentSerializations.length);
-    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
-    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);
-
-    AvroSerialization.addToConfiguration(conf);
-    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
-    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
-  }
-
-
-  private void resetAvroSerialization() throws IOException {
-    Configuration conf = mapDriver.getConfiguration();
-    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
-        + "org.apache.hadoop.io.serializer.WritableSerialization");
-    Schema outputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(TRANSFORMATION_SCHEMA));
-
-    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
-    String[] finalSerializations = new String[currentSerializations.length + 1];
-    System.arraycopy(currentSerializations, 0, finalSerializations, 0,
-        currentSerializations.length);
-    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
-    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);
-
-    AvroSerialization.addToConfiguration(conf);
-    AvroSerialization.setKeyWriterSchema(conf, outputSchema);
-    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
-
-  }
-
-  private List<GenericRecord> generateTestData() throws Exception {
-    Schema schema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-    List<GenericRecord> inputRecords = new ArrayList<GenericRecord>();
-
-    GenericRecord input = new GenericData.Record(schema);
-    input.put("d1", "abc1");
-    input.put("d2", 501L);
-    input.put("d3", "xyz1");
-    input.put("hoursSinceEpoch", generateRandomHoursSinceEpoch());
-    input.put("m1", 10);
-    input.put("m2", 20);
-    inputRecords.add(input);
-
-    input = new GenericData.Record(schema);
-    input.put("d1", "abc2");
-    input.put("d2", 502L);
-    input.put("d3", "xyz2");
-    input.put("hoursSinceEpoch", generateRandomHoursSinceEpoch());
-    input.put("m1", 10);
-    input.put("m2", 20);
-    inputRecords.add(input);
-
-    return inputRecords;
-  }
-
-  @Before
-  public void setUp() throws Exception {
-    DerivedColumnTransformationPhaseMapper mapper = new DerivedColumnTransformationPhaseMapper();
-    mapDriver = MapDriver.newMapDriver(mapper);
-    Configuration configuration = mapDriver.getConfiguration();
-    configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
-        + "org.apache.hadoop.io.serializer.WritableSerialization");
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "INT,INT");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "hoursSinceEpoch");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), "d2,");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d2", "m1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d2", "1");
-
-    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    configuration.set(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString(),
-        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    Schema inputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-    setUpAvroSerialization(mapDriver.getConfiguration(), inputSchema);
-
-    Schema outputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(TRANSFORMATION_SCHEMA));
-    configuration.set(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString(),
-        outputSchema.toString());
-
-    configuration.set(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString(),
-        ClassLoader.getSystemResource(TOPK_PATH).toString());
-
-    TemporaryPath tmpPath = new TemporaryPath();
-    outputPath = tmpPath.toString();
-    configuration.set(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_PATH.toString(), outputPath);
-
-  }
-
-  @Test
-  public void testTopKColumnTransformationPhase() throws Exception {
-    int recordCount = 0;
-
-    List<GenericRecord> inputRecords = generateTestData();
-    for (GenericRecord record : inputRecords) {
-      AvroKey<GenericRecord> inKey = new AvroKey<GenericRecord>();
-      inKey.datum(record);
-      mapDriver.addInput(new Pair<AvroKey<GenericRecord>, NullWritable>(inKey, NullWritable.get()));
-      recordCount++;
-    }
-
-    resetAvroSerialization();
-    List<Pair<AvroKey<GenericRecord>, NullWritable>> result = mapDriver.run();
-    Assert.assertEquals(recordCount, result.size());
-
-    for (Pair<AvroKey<GenericRecord>, NullWritable> pair : result) {
-      GenericRecord datum = pair.getFirst().datum();
-      Assert.assertEquals("TopKTransformationJob did not add new column for topk column",
-          datum.getSchema().getField("d2_topk") != null, true);
-      Long d2 =  (Long) datum.get("d2");
-      String d2_topk =  (String) datum.get("d2_topk");
-      Assert.assertEquals("Incorrect topk column transformation", (d2_topk.equals("other") && d2 == 501l) || (d2_topk.equals("502") && d2 == 502l), true);
-    }
-  }
-
-  @After
-  public void cleanUp() throws IOException {
-
-    File f = new File(outputPath);
-    FileUtils.deleteDirectory(f);
-  }
-
-  public static class DerivedColumnTransformationPhaseMapper
-  extends Mapper<AvroKey<GenericRecord>, NullWritable, AvroKey<GenericRecord>, NullWritable> {
-
-    private Schema outputSchema;
-    private ThirdEyeConfig thirdeyeConfig;
-    private DerivedColumnTransformationPhaseConfig config;
-    private List<String> dimensionsNames;
-    private List<DimensionType> dimensionTypes;
-    private Map<String, String> nonWhitelistValueMap;
-    private List<String> metricNames;
-    private TopKDimensionValues topKDimensionValues;
-    private Map<String, Set<String>> topKDimensionsMap;
-    private String timeColumnName;
-    private Map<String, List<String>> whitelist;
-
-    @Override
-    public void setup(Context context) throws IOException, InterruptedException {
-      Configuration configuration = context.getConfiguration();
-      FileSystem fs = FileSystem.get(configuration);
-
-      thirdeyeConfig = OBJECT_MAPPER.readValue(configuration.get(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
-      config = DerivedColumnTransformationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
-      dimensionsNames = config.getDimensionNames();
-      dimensionTypes = config.getDimensionTypes();
-      nonWhitelistValueMap = config.getNonWhitelistValue();
-      metricNames = config.getMetricNames();
-      timeColumnName = config.getTimeColumnName();
-      whitelist = config.getWhitelist();
-
-      outputSchema = new Schema.Parser().parse(configuration.get(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_OUTPUT_SCHEMA.toString()));
-
-      Path topKPath = new Path(configuration.get(DerivedColumnTransformationPhaseConstants.DERIVED_COLUMN_TRANSFORMATION_PHASE_TOPK_PATH.toString())
-          + File.separator + ThirdEyeConstants.TOPK_VALUES_FILE);
-      topKDimensionValues = new TopKDimensionValues();
-      if (fs.exists(topKPath)) {
-        FSDataInputStream topkValuesStream = fs.open(topKPath);
-        topKDimensionValues = OBJECT_MAPPER.readValue((DataInput)topkValuesStream, TopKDimensionValues.class);
-        topkValuesStream.close();
-      }
-      topKDimensionsMap = topKDimensionValues.getTopKDimensions();
-    }
-
-
-    @Override
-    public void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
-        throws IOException, InterruptedException {
-
-      // input record
-      GenericRecord inputRecord = key.datum();
-
-      // output record
-      GenericRecord outputRecord = new Record(outputSchema);
-
-      // dimensions
-      for (int i = 0; i < dimensionsNames.size(); i++) {
-        String dimensionName = dimensionsNames.get(i);
-        DimensionType dimensionType = dimensionTypes.get(i);
-        Object dimensionValue = ThirdeyeAvroUtils.getDimensionFromRecord(inputRecord, dimensionName);
-        String dimensionValueStr = String.valueOf(dimensionValue);
-
-        // add original dimension value with whitelist applied
-        Object whitelistDimensionValue = dimensionValue;
-        if (whitelist != null) {
-          List<String> whitelistDimensions = whitelist.get(dimensionName);
-          if (CollectionUtils.isNotEmpty(whitelistDimensions)) {
-            // whitelist config exists for this dimension but value not present in whitelist
-            if (!whitelistDimensions.contains(dimensionValueStr)) {
-              whitelistDimensionValue = dimensionType.getValueFromString(nonWhitelistValueMap.get(dimensionName));
-            }
-          }
-        }
-        outputRecord.put(dimensionName, whitelistDimensionValue);
-
-        // add column for topk, if topk config exists for that column
-        if (topKDimensionsMap.containsKey(dimensionName)) {
-          Set<String> topKDimensionValues = topKDimensionsMap.get(dimensionName);
-          // if topk config exists for that dimension
-          if (CollectionUtils.isNotEmpty(topKDimensionValues)) {
-            String topkDimensionName = dimensionName + ThirdEyeConstants.TOPK_DIMENSION_SUFFIX;
-            Object topkDimensionValue = dimensionValue;
-            // topk config exists for this dimension, but value not present in topk
-            if (!topKDimensionValues.contains(dimensionValueStr) &&
-                (whitelist == null || whitelist.get(dimensionName) == null || !whitelist.get(dimensionName).contains(dimensionValueStr))) {
-              topkDimensionValue = ThirdEyeConstants.OTHER;
-            }
-            outputRecord.put(topkDimensionName, String.valueOf(topkDimensionValue));
-          }
-        }
-      }
-
-
-      // metrics
-      for (String metric : metricNames) {
-        outputRecord.put(metric, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, metric));
-      }
-
-      // time
-      outputRecord.put(timeColumnName, ThirdeyeAvroUtils.getMetricFromRecord(inputRecord, timeColumnName));
-
-      AvroKey<GenericRecord> outputKey = new AvroKey<GenericRecord>(outputRecord);
-      context.write(outputKey, NullWritable.get());
-    }
-
-    @Override
-    public void cleanup(Context context) throws IOException, InterruptedException {
-
-    }
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedSchemaGenerationTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedSchemaGenerationTest.java
deleted file mode 100644
index 6d8219b21f..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/derivedcolumn/transformation/DerivedSchemaGenerationTest.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.derivedcolumn.transformation;
-
-import java.io.IOException;
-import java.util.Properties;
-
-import org.apache.avro.Schema;
-import org.testng.Assert;
-import org.testng.annotations.BeforeTest;
-import org.testng.annotations.Test;
-
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-
-public class DerivedSchemaGenerationTest {
-  private static final String AVRO_SCHEMA = "schema.avsc";
-
-  DerivedColumnTransformationPhaseJob job = new DerivedColumnTransformationPhaseJob("derived_column_transformation", null);
-  Schema inputSchema;
-  ThirdEyeConfig thirdeyeConfig;
-  Properties props;
-
-  @BeforeTest
-  public void setup() throws IOException {
-    inputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-
-    props = new Properties();
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "INT,INT");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "hoursSinceEpoch");
-
-  }
-
-  @Test
-  public void testDerivedColumnsSchemaGeneration() throws Exception{
-    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    Schema outputSchema = job.newSchema(thirdeyeConfig);
-    Assert.assertEquals(inputSchema.getFields().size(), outputSchema.getFields().size(),
-        "Input schema should be same as output schema if no topk/whitelist in config");
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), "d2,");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d2", "m1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d2", "1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), "d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d2" , "10,20,30");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d3", "x,y");
-
-    thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-    outputSchema = job.newSchema(thirdeyeConfig);
-    Assert.assertEquals(inputSchema.getFields().size() + 1, outputSchema.getFields().size(),
-        "Input schema should not be same as output schema if topk/whitelist in config");
-
-    Assert.assertEquals(outputSchema.getField("d2_topk") != null, true,
-        "Output schema should have _topk entries for columsn in topk");
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIsTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIsTest.java
deleted file mode 100644
index 5f29933087..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/push/SegmentPushControllerAPIsTest.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.push;
-
-import java.util.List;
-
-import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
-import com.google.common.collect.Lists;
-
-public class SegmentPushControllerAPIsTest {
-
-  private String[] controllerHosts = null;
-  private String controllerPort = "0";
-  private String testTable1 = "testTable";
-  private String testTable2 = "test_table";
-  SegmentPushControllerAPIs segmentPushControllerAPIs;
-
-  @BeforeClass
-  public void setup() {
-    segmentPushControllerAPIs = new SegmentPushControllerAPIs(controllerHosts, controllerPort);
-  }
-
-  @Test
-  public void testOverlapPattern() throws Exception {
-
-    String segmentName = testTable1 + "_DAILY_2016-04-28-000000_2016-04-29-000000";
-    String overlapPattern = segmentPushControllerAPIs.getOverlapPattern(segmentName, testTable1);
-    Assert.assertEquals(overlapPattern, testTable1 + "_HOURLY_2016-04-28", "Incorrect overlap pattern for segment " + segmentName);
-
-    segmentName = testTable2 + "_DAILY_2016-04-28-000000_2016-04-29-000000";
-    overlapPattern = segmentPushControllerAPIs.getOverlapPattern(segmentName, testTable2);
-    Assert.assertEquals(overlapPattern, testTable2 + "_HOURLY_2016-04-28", "Incorrect overlap pattern for segment " + segmentName);
-  }
-
-  @Test
-  public void testGetOverlappingSegments() throws Exception {
-    List<String> allSegments = Lists.newArrayList(
-        "test_HOURLY_2016-04-28-000000_2016-04-28-010000",
-        "test_HOURLY_2016-04-28-230000_2016-04-29-000000",
-        "test_DAILY_2016-04-28-000000_2016-04-29-000000");
-    String pattern = "test_HOURLY_2016-04-28";
-    List<String> overlappingSegments = segmentPushControllerAPIs.getOverlappingSegments(allSegments, pattern);
-    allSegments.remove(2);
-    Assert.assertEquals(overlappingSegments, allSegments, "Incorrect overlapping segments returned");
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPairTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPairTest.java
deleted file mode 100644
index 9c8c844052..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/topk/DimensionValueMetricPairTest.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import com.google.common.collect.MinMaxPriorityQueue;
-
-public class DimensionValueMetricPairTest {
-
-  @Test
-  public void comparatorTest() throws Exception {
-
-    MinMaxPriorityQueue<DimensionValueMetricPair> testQueue = MinMaxPriorityQueue.maximumSize(2).create();
-
-    DimensionValueMetricPair d1 = new DimensionValueMetricPair("d1", 1);
-    DimensionValueMetricPair d2 = new DimensionValueMetricPair("d2", 2);
-    DimensionValueMetricPair d3 = new DimensionValueMetricPair(30, 3);
-    DimensionValueMetricPair d4 = new DimensionValueMetricPair("d4", 4);
-
-    testQueue.add(d1);
-    testQueue.add(d2);
-    testQueue.add(d3);
-    testQueue.add(d4);
-
-    for (DimensionValueMetricPair pair : testQueue) {
-      Assert.assertEquals(pair.getMetricValue().intValue() > 2, true,
-          "Incorrect comparator for DimensionValueMetricPair, queue must retain highest metric values");
-    }
-
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/topk/TopkPhaseTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/topk/TopkPhaseTest.java
deleted file mode 100644
index 28c6383059..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/topk/TopkPhaseTest.java
+++ /dev/null
@@ -1,238 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.topk;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Properties;
-import java.util.Random;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.hadoop.io.AvroSerialization;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mrunit.mapreduce.MapDriver;
-import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
-import org.apache.hadoop.mrunit.testutil.TemporaryPath;
-import org.apache.hadoop.mrunit.types.Pair;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.collect.Lists;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConstants;
-import com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob.TopKPhaseMapper;
-import com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob.TopKPhaseReducer;
-
-/**
- * This test will test mapper of Topk phase,
- * to ensure the right pairs being emitted
- * This will also test the topk file generated
- */
-public class TopkPhaseTest {
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-
-  private static final String HADOOP_IO_SERIALIZATION = "io.serializations";
-  private static final String AVRO_SCHEMA = "schema.avsc";
-
-  private String outputPath;
-  private Schema inputSchema;
-  private ThirdEyeConfig thirdeyeConfig;
-  Properties props = new Properties();
-
-  private MapDriver<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> mapDriver;
-  private ReduceDriver<BytesWritable, BytesWritable, NullWritable, NullWritable> reduceDriver;
-
-  private long generateRandomHoursSinceEpoch() {
-    Random r = new Random();
-    // setting base value to year 2012
-    long unixtime = (long) (1293861599 + r.nextDouble() * 60 * 60 * 24 * 365);
-    return TimeUnit.SECONDS.toHours(unixtime);
-  }
-
-
-  private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
-    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
-    String[] finalSerializations = new String[currentSerializations.length + 1];
-    System.arraycopy(currentSerializations, 0, finalSerializations, 0,
-        currentSerializations.length);
-    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
-    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);
-
-    AvroSerialization.addToConfiguration(conf);
-    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
-    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
-  }
-
-  private List<GenericRecord> generateTestMapperData() throws Exception {
-    List<GenericRecord> inputRecords = new ArrayList<GenericRecord>();
-
-    GenericRecord input = new GenericData.Record(inputSchema);
-    input.put("d1", "abc1");
-    input.put("d2", 501L);
-    input.put("d3", "xyz1");
-    input.put("hoursSinceEpoch", generateRandomHoursSinceEpoch());
-    input.put("m1", 100);
-    input.put("m2", 20);
-    inputRecords.add(input);
-
-    input = new GenericData.Record(inputSchema);
-    input.put("d1", "abc2");
-    input.put("d2", 502L);
-    input.put("d3", "xyz2");
-    input.put("hoursSinceEpoch", generateRandomHoursSinceEpoch());
-    input.put("m1", 10);
-    input.put("m2", 20);
-    inputRecords.add(input);
-
-    return inputRecords;
-  }
-
-
-  private List<Pair<BytesWritable,List<BytesWritable>>> generateTestReduceData(List<Pair<BytesWritable, BytesWritable>> result) throws Exception {
-    List<Pair<BytesWritable, List<BytesWritable>>> inputRecords = new ArrayList<>();
-    Map<BytesWritable, List<BytesWritable>> inputMap = new TreeMap<>();
-
-    for (Pair<BytesWritable, BytesWritable> pair : result) {
-      inputMap.put(pair.getFirst(), Lists.newArrayList(pair.getSecond()));
-    }
-    for (Entry<BytesWritable, List<BytesWritable>> listPair : inputMap.entrySet()) {
-      inputRecords.add(new Pair<BytesWritable, List<BytesWritable>>(listPair.getKey(), listPair.getValue()));
-    }
-    return inputRecords;
-  }
-
-  @Before
-  public void setUp() throws Exception {
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "INT,INT");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "hoursSinceEpoch");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), "d2,");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d2", "m1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d2", "1");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), "d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d3", "xyz2");
-    thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-
-    // Mapper config
-    TopKPhaseMapper mapper = new TopKPhaseMapper();
-    mapDriver = MapDriver.newMapDriver(mapper);
-    Configuration configuration = mapDriver.getConfiguration();
-    configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
-        + "org.apache.hadoop.io.serializer.WritableSerialization");
-
-    configuration.set(TopKPhaseConstants.TOPK_PHASE_THIRDEYE_CONFIG.toString(),
-        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    inputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-    setUpAvroSerialization(mapDriver.getConfiguration(), inputSchema);
-
-    // Reducer config
-    TopKPhaseReducer reducer = new TopKPhaseReducer();
-    reduceDriver = ReduceDriver.newReduceDriver(reducer);
-    configuration = reduceDriver.getConfiguration();
-    configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
-        + "org.apache.hadoop.io.serializer.WritableSerialization");
-
-    configuration.set(TopKPhaseConstants.TOPK_PHASE_THIRDEYE_CONFIG.toString(),
-        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
-
-    TemporaryPath tmpPath = new TemporaryPath();
-    outputPath = tmpPath.toString();
-    configuration.set(TopKPhaseConstants.TOPK_PHASE_OUTPUT_PATH.toString(), outputPath);
-
-  }
-
-  @Test
-  public void testTopKColumnTransformationPhase() throws Exception {
-
-    int recordCount = 0;
-    List<GenericRecord> inputRecords = generateTestMapperData();
-    for (GenericRecord record : inputRecords) {
-      AvroKey<GenericRecord> inKey = new AvroKey<GenericRecord>();
-      inKey.datum(record);
-      mapDriver.addInput(new Pair<AvroKey<GenericRecord>, NullWritable>(inKey, NullWritable.get()));
-      recordCount++;
-    }
-
-    List<Pair<BytesWritable, BytesWritable>> result = mapDriver.run();
-    // for each record, we emit
-    // a records per dimension
-    // and one record for ALL,ALL
-    Assert.assertEquals("Incorrect number of records emitted by mapper", recordCount * (3 + 1), result.size());
-
-    Map<String, Integer> counts = new HashMap<>();
-    for (Pair<BytesWritable, BytesWritable> pair : result) {
-      TopKPhaseMapOutputKey key = TopKPhaseMapOutputKey.fromBytes(pair.getFirst().getBytes());
-      String dimensionName = key.getDimensionName();
-      Integer count = counts.get(dimensionName);
-      if (count == null) {
-        count = 0;
-      }
-      counts.put(dimensionName , count + 1);
-    }
-    Assert.assertEquals("Incorrect number of records emitted from map", 2, (int) counts.get("d1"));
-    Assert.assertEquals("Incorrect number of records emitted from map", 2, (int) counts.get("d2"));
-    Assert.assertEquals("Incorrect number of records emitted from map", 2, (int) counts.get("d3"));
-    Assert.assertEquals("Incorrect number of records emitted from map", 2, (int) counts.get("0"));
-
-    List<Pair<BytesWritable, List<BytesWritable>>> reduceInput = generateTestReduceData(result);
-    reduceDriver.addAll(reduceInput);
-    reduceDriver.run();
-
-    File topKFile = new File(outputPath, ThirdEyeConstants.TOPK_VALUES_FILE);
-    Assert.assertTrue("Topk file failed to generate!", topKFile.exists());
-    TopKDimensionValues topk = OBJECT_MAPPER.readValue(new FileInputStream(topKFile), TopKDimensionValues.class);
-    Map<String, Set<String>> topkMap = topk.getTopKDimensions();
-    Assert.assertEquals("Incorrect topk object", topkMap.size(), 1);
-    Set<String> expected = new HashSet<>();
-    expected.add("501");
-    Assert.assertEquals("Incorrect topk values in topk object", expected, topkMap.get("d2"));
-    Assert.assertEquals("Incorrect whitelist values in topk object", null, topkMap.get("d3"));
-  }
-
-
-
-  @After
-  public void cleanUp() throws IOException {
-
-    File f = new File(outputPath);
-    FileUtils.deleteDirectory(f);
-  }
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtilsTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtilsTest.java
deleted file mode 100644
index a3108ce9fa..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAggregateMetricUtilsTest.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.util;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.testng.Assert;
-import org.testng.annotations.BeforeTest;
-import org.testng.annotations.Test;
-
-import com.linkedin.thirdeye.hadoop.config.MetricType;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyeAggregateMetricUtils;
-
-public class ThirdeyeAggregateMetricUtilsTest {
-  List<MetricType> metricTypes;
-  Number[] aggMetricValues;
-  Number[] metricValues;
-  Number[] expectedValues;
-
-  @BeforeTest
-  public void setup() {
-    metricTypes = new ArrayList<>();
-    metricTypes.add(MetricType.INT);
-    metricTypes.add(MetricType.FLOAT);
-    metricTypes.add(MetricType.LONG);
-
-    aggMetricValues = new Number[3];
-    Arrays.fill(aggMetricValues, 0);
-    metricValues = new Number[3];
-    Arrays.fill(metricValues, 0);
-    expectedValues = new Number[3];
-    Arrays.fill(expectedValues, 0);
-
-  }
-
-  @Test
-  public void testAggregateMetricUtils() throws Exception {
-
-    ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
-    expectedValues[0] = 0;
-    expectedValues[1] = 0.0f;
-    expectedValues[2] = 0L;
-    Assert.assertEquals(expectedValues, aggMetricValues);
-
-
-    metricValues[0] = 10;
-    metricValues[1] = 2.5f;
-    metricValues[2] = 111111111111L;
-    ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
-    expectedValues[0] = 10;
-    expectedValues[1] = 2.5f;
-    expectedValues[2] = 111111111111L;
-    Assert.assertEquals(expectedValues, aggMetricValues);
-
-    metricValues[0] = 10;
-    metricValues[1] = 2.5f;
-    metricValues[2] = 111111111111L;
-    ThirdeyeAggregateMetricUtils.aggregate(metricTypes, aggMetricValues, metricValues);
-    expectedValues[0] = 20;
-    expectedValues[1] = 5.0f;
-    expectedValues[2] = 222222222222L;
-    Assert.assertEquals(expectedValues, aggMetricValues);
-
-
-
-
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAvroUtilsTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAvroUtilsTest.java
deleted file mode 100644
index 036645440c..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyeAvroUtilsTest.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.thirdeye.hadoop.util;
-
-import java.util.concurrent.TimeUnit;
-
-import org.apache.avro.Schema;
-import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
-import com.linkedin.pinot.common.data.DimensionFieldSpec;
-import com.linkedin.pinot.common.data.FieldSpec;
-import com.linkedin.pinot.common.data.FieldSpec.DataType;
-import com.linkedin.pinot.common.data.MetricFieldSpec;
-import com.linkedin.pinot.common.data.TimeFieldSpec;
-import com.linkedin.pinot.common.data.TimeGranularitySpec;
-
-public class ThirdeyeAvroUtilsTest {
-
-  public Schema avroSchema;
-
-  private static final String AVRO_SCHEMA = "schema.avsc";
-
-  @BeforeClass
-  public void setup() throws Exception {
-    avroSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
-  }
-
-  @Test
-  public void testGetDimensionTypes() throws Exception{
-    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty("d1,d2,d3", avroSchema);
-    Assert.assertEquals(dimensionTypesProperty, "STRING,LONG,STRING", "Dimension property not extracted correctly");
-  }
-
-  @Test
-  public void testGetDimensionTypesEmpty() throws Exception{
-    String dimensionTypesProperty = ThirdeyeAvroUtils.getDimensionTypesProperty("", avroSchema);
-    Assert.assertEquals(dimensionTypesProperty, "", "Dimension property not extracted correctly");
-  }
-
-  @Test
-  public void testGetMetricTypes() throws Exception{
-    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty("m1,m2", null, avroSchema);
-    Assert.assertEquals(metricTypesProperty, "INT,INT", "Metric property not extracted correctly");
-
-    metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty("m1,m2", "INT,LONG", avroSchema);
-    Assert.assertEquals(metricTypesProperty, "INT,LONG", "Metric property not extracted correctly");
-  }
-
-  @Test
-  public void testGetDataTypeForField() throws Exception {
-    String type = ThirdeyeAvroUtils.getDataTypeForField("d1", avroSchema);
-    Assert.assertEquals(type, "STRING", "Data type not extracted correctly for d1");
-    type = ThirdeyeAvroUtils.getDataTypeForField("hoursSinceEpoch", avroSchema);
-    Assert.assertEquals(type, "LONG", "Data type not extracted correctly for hoursSinceEpoch");
-    type = ThirdeyeAvroUtils.getDataTypeForField("m1", avroSchema);
-    Assert.assertEquals(type, "INT", "Data type not extracted correctly for m1");
-  }
-
-  @Test
-  public void testConstructAvroSchemaFromPinotSchema() throws Exception {
-    com.linkedin.pinot.common.data.Schema pinotSchema = new com.linkedin.pinot.common.data.Schema();
-
-    pinotSchema.setSchemaName("test");
-    FieldSpec spec = new DimensionFieldSpec("d1", DataType.STRING, true);
-    pinotSchema.addField(spec);
-    spec = new MetricFieldSpec("m1", DataType.DOUBLE);
-    pinotSchema.addField(spec);
-    spec = new TimeFieldSpec(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, "t"));
-    pinotSchema.addField(spec);
-
-    Schema avroSchema = ThirdeyeAvroUtils.constructAvroSchemaFromPinotSchema(pinotSchema);
-    String dType = ThirdeyeAvroUtils.getDataTypeForField("d1", avroSchema);
-    Assert.assertEquals(dType, "STRING", "Avro schema constructed incorrectly");
-    dType = ThirdeyeAvroUtils.getDataTypeForField("m1", avroSchema);
-    Assert.assertEquals(dType, "DOUBLE", "Avro schema constructed incorrectly");
-    dType = ThirdeyeAvroUtils.getDataTypeForField("t", avroSchema);
-    Assert.assertEquals(dType, "LONG", "Avro schema constructed incorrectly");
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyePinotSchemaUtilsTest.java b/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyePinotSchemaUtilsTest.java
deleted file mode 100644
index 392d8b0bca..0000000000
--- a/thirdeye/thirdeye-hadoop/src/test/java/com/linkedin/thirdeye/hadoop/util/ThirdeyePinotSchemaUtilsTest.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *         http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.linkedin.thirdeye.hadoop.util;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.Properties;
-
-import org.testng.Assert;
-import org.testng.annotations.BeforeTest;
-import org.testng.annotations.Test;
-
-import com.linkedin.pinot.common.data.Schema;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfig;
-import com.linkedin.thirdeye.hadoop.config.ThirdEyeConfigProperties;
-import com.linkedin.thirdeye.hadoop.util.ThirdeyePinotSchemaUtils;
-
-public class ThirdeyePinotSchemaUtilsTest {
-
-  ThirdEyeConfig thirdeyeConfig;
-  Properties props;
-
-  @BeforeTest
-  public void setup() {
-    props = new Properties();
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "INT,INT");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "hoursSinceEpoch");
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), "d2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d2", "m1,m2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d2", "20,30");
-
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), "d1,d2");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d1", "x,y");
-    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d2", "20");
-
-    thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
-  }
-
-  @Test
-  public void testThirdeyeConfigToPinotSchemaGeneration() throws Exception {
-    Schema schema = ThirdeyePinotSchemaUtils.createSchema(thirdeyeConfig);
-
-    Assert.assertEquals(schema.getAllFieldSpecs().size(), 8, "Incorrect pinot schema fields list size");
-    List<String> dimensions = Arrays.asList("d1", "d2", "d2_topk", "d3");
-    Assert.assertEquals(schema.getDimensionNames().containsAll(dimensions), true,
-        "New schema dimensions " + schema.getDimensionNames() + " is missing dimensions");
-
-    List<String> metrics = Arrays.asList("m1", "m2", "__COUNT");
-    Assert.assertEquals(schema.getMetricNames().containsAll(metrics), true,
-        "New schema metrics " + schema.getMetricNames() + "is missing metrics");
-
-    Assert.assertEquals(schema.getTimeColumnName(), "hoursSinceEpoch");
-
-  }
-
-}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseTest.java b/thirdeye/thirdeye-hadoop/src/test/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseTest.java
new file mode 100644
index 0000000000..b422f2f4aa
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/test/java/org/apache/pinot/thirdeye/hadoop/aggregation/AggregationPhaseTest.java
@@ -0,0 +1,265 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pinot.thirdeye.hadoop.aggregation;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.TreeMap;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.hadoop.io.AvroSerialization;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mrunit.mapreduce.MapDriver;
+import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
+import org.apache.hadoop.mrunit.testutil.TemporaryPath;
+import org.apache.hadoop.mrunit.types.Pair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Lists;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfig;
+import org.apache.pinot.thirdeye.hadoop.config.ThirdEyeConfigProperties;
+import org.apache.pinot.thirdeye.hadoop.aggregation.AggregationPhaseJob.AggregationMapper;
+import org.apache.pinot.thirdeye.hadoop.aggregation.AggregationPhaseJob.AggregationReducer;
+
+/**
+ * This tests mapper of Aggregation phase, to check conversion of time column to bucket time
+ * This also tests reducer to check aggregation using new time values
+ */
+public class AggregationPhaseTest {
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+  private static final String HADOOP_IO_SERIALIZATION = "io.serializations";
+  private static final String AVRO_SCHEMA = "schema.avsc";
+
+  private String outputPath;
+  private Schema inputSchema;
+  private ThirdEyeConfig thirdeyeConfig;
+  private AggregationPhaseConfig aggPhaseConfig;
+  Properties props = new Properties();
+
+  private MapDriver<AvroKey<GenericRecord>, NullWritable, BytesWritable, BytesWritable> mapDriver;
+  private ReduceDriver<BytesWritable, BytesWritable, AvroKey<GenericRecord>, NullWritable> reduceDriver;
+
+  private void setUpAvroSerialization(Configuration conf, Schema inputSchema) {
+    String[] currentSerializations = conf.getStrings(HADOOP_IO_SERIALIZATION);
+    String[] finalSerializations = new String[currentSerializations.length + 1];
+    System.arraycopy(currentSerializations, 0, finalSerializations, 0,
+        currentSerializations.length);
+    finalSerializations[finalSerializations.length - 1] = AvroSerialization.class.getName();
+    mapDriver.getConfiguration().setStrings(HADOOP_IO_SERIALIZATION, finalSerializations);
+
+    AvroSerialization.addToConfiguration(conf);
+    AvroSerialization.setKeyWriterSchema(conf, inputSchema);
+    AvroSerialization.setValueWriterSchema(conf, Schema.create(Schema.Type.NULL));
+  }
+
+  private List<GenericRecord> generateTestMapperData() throws Exception {
+    List<GenericRecord> inputRecords = new ArrayList<GenericRecord>();
+
+    // 2016-04-27T190000
+    GenericRecord input = new GenericData.Record(inputSchema);
+    input.put("d1", "abc1");
+    input.put("d2", 501L);
+    input.put("d3", "xyz1");
+    input.put("hoursSinceEpoch", 1461808800000L);
+    input.put("m1", 100);
+    input.put("m2", 20);
+    inputRecords.add(input);
+
+    // 2016-04-27T191000
+    input = new GenericData.Record(inputSchema);
+    input.put("d1", "abc1");
+    input.put("d2", 501L);
+    input.put("d3", "xyz1");
+    input.put("hoursSinceEpoch", 1461809400000L);
+    input.put("m1", 100);
+    input.put("m2", 20);
+    inputRecords.add(input);
+
+    // 2016-04-27T20
+    input = new GenericData.Record(inputSchema);
+    input.put("d1", "abc2");
+    input.put("d2", 502L);
+    input.put("d3", "xyz2");
+    input.put("hoursSinceEpoch", 1461812400000L);
+    input.put("m1", 10);
+    input.put("m2", 2);
+    inputRecords.add(input);
+
+    return inputRecords;
+  }
+
+
+  private List<Pair<BytesWritable,List<BytesWritable>>> generateTestReduceData(List<Pair<BytesWritable, BytesWritable>> result) throws Exception {
+    List<Pair<BytesWritable, List<BytesWritable>>> inputRecords = new ArrayList<>();
+    Map<BytesWritable, List<BytesWritable>> inputMap = new TreeMap<>();
+
+    for (Pair<BytesWritable, BytesWritable> pair : result) {
+      inputMap.put(pair.getFirst(), new ArrayList<BytesWritable>());
+    }
+
+    for (Pair<BytesWritable, BytesWritable> pair : result) {
+      inputMap.get(pair.getFirst()).add(pair.getSecond());
+    }
+    for (Entry<BytesWritable, List<BytesWritable>> listPair : inputMap.entrySet()) {
+      inputRecords.add(new Pair<BytesWritable, List<BytesWritable>>(listPair.getKey(), listPair.getValue()));
+    }
+    return inputRecords;
+  }
+
+  @Before
+  public void setUp() throws Exception {
+
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "INT,INT");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "hoursSinceEpoch");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_SIZE.toString(), "1");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_TYPE.toString(), TimeUnit.HOURS.toString());
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_SIZE.toString(), "1");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_INPUT_TIMECOLUMN_TYPE.toString(), TimeUnit.MILLISECONDS.toString());
+    thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
+    aggPhaseConfig = AggregationPhaseConfig.fromThirdEyeConfig(thirdeyeConfig);
+
+    // Mapper config
+    AggregationMapper mapper = new AggregationMapper();
+    mapDriver = MapDriver.newMapDriver(mapper);
+    Configuration configuration = mapDriver.getConfiguration();
+    configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
+        + "org.apache.hadoop.io.serializer.WritableSerialization");
+
+    configuration.set(AggregationPhaseConstants.AGG_PHASE_THIRDEYE_CONFIG.toString(),
+        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
+
+    inputSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
+    setUpAvroSerialization(mapDriver.getConfiguration(), inputSchema);
+
+    // Reducer config
+    AggregationReducer reducer = new AggregationReducer();
+    reduceDriver = ReduceDriver.newReduceDriver(reducer);
+    configuration = reduceDriver.getConfiguration();
+    configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
+        + "org.apache.hadoop.io.serializer.WritableSerialization");
+
+    Schema reducerSchema = new Schema.Parser().parse(ClassLoader.getSystemResourceAsStream(AVRO_SCHEMA));
+    configuration.set(AggregationPhaseConstants.AGG_PHASE_AVRO_SCHEMA.toString(), reducerSchema.toString());
+
+    configuration.set(AggregationPhaseConstants.AGG_PHASE_THIRDEYE_CONFIG.toString(),
+        OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));
+
+    TemporaryPath tmpPath = new TemporaryPath();
+    outputPath = tmpPath.toString();
+    configuration.set(AggregationPhaseConstants.AGG_PHASE_OUTPUT_PATH.toString(), outputPath);
+    setUpAvroSerialization(reduceDriver.getConfiguration(), reducerSchema);
+
+  }
+
+  @Test
+  public void testAggregationPhase() throws Exception {
+
+    int recordCount = 0;
+    List<GenericRecord> inputRecords = generateTestMapperData();
+    for (GenericRecord record : inputRecords) {
+      AvroKey<GenericRecord> inKey = new AvroKey<GenericRecord>();
+      inKey.datum(record);
+      mapDriver.addInput(new Pair<AvroKey<GenericRecord>, NullWritable>(inKey, NullWritable.get()));
+      recordCount++;
+    }
+
+    List<Pair<BytesWritable, BytesWritable>> mapResult = mapDriver.run();
+    Assert.assertEquals("Incorrect number of records emitted by mapper", recordCount, mapResult.size());
+
+    AggregationPhaseMapOutputKey keyWrapper =
+        AggregationPhaseMapOutputKey.fromBytes(mapResult.get(0).getFirst().getBytes(), aggPhaseConfig.getDimensionTypes());
+    Assert.assertEquals(406058, keyWrapper.getTime());
+    keyWrapper = AggregationPhaseMapOutputKey.fromBytes(mapResult.get(1).getFirst().getBytes(), aggPhaseConfig.getDimensionTypes());
+    Assert.assertEquals(406058, keyWrapper.getTime());
+    keyWrapper = AggregationPhaseMapOutputKey.fromBytes(mapResult.get(2).getFirst().getBytes(), aggPhaseConfig.getDimensionTypes());
+    Assert.assertEquals(406059, keyWrapper.getTime());
+
+    List<Pair<BytesWritable, List<BytesWritable>>> reduceInput = generateTestReduceData(mapResult);
+    reduceDriver.addAll(reduceInput);
+
+    List<Pair<AvroKey<GenericRecord>, NullWritable>> reduceResult = reduceDriver.run();
+    Assert.assertEquals("Incorrect number of records returned by aggregation reducer", 2, reduceResult.size());
+
+    GenericRecord record = reduceResult.get(0).getFirst().datum();
+    List<Object> dimensionsExpected = Lists.newArrayList();
+    dimensionsExpected.add("abc1");
+    dimensionsExpected.add(501L);
+    dimensionsExpected.add("xyz1");
+    List<Object> dimensionsActual = getDimensionsFromRecord(record);
+    Assert.assertEquals(dimensionsExpected, dimensionsActual);
+    List<Integer> metricsExpected = Lists.newArrayList(200, 40);
+    List<Integer> metricsActual = getMetricsFromRecord(record);
+    Assert.assertEquals(metricsExpected, metricsActual);
+    Assert.assertEquals(406058, (long) record.get("hoursSinceEpoch"));
+
+
+    record = reduceResult.get(1).getFirst().datum();
+    dimensionsExpected = Lists.newArrayList();
+    dimensionsExpected.add("abc2");
+    dimensionsExpected.add(502L);
+    dimensionsExpected.add("xyz2");
+    dimensionsActual = getDimensionsFromRecord(record);
+    Assert.assertEquals(dimensionsExpected, dimensionsActual);
+    metricsExpected = Lists.newArrayList(10, 2);
+    metricsActual = getMetricsFromRecord(record);
+    Assert.assertEquals(metricsExpected, metricsActual);
+    Assert.assertEquals(406059, (long) record.get("hoursSinceEpoch"));
+  }
+
+  private List<Object> getDimensionsFromRecord(GenericRecord record) {
+    List<Object> dimensionsActual = new ArrayList<>();
+    dimensionsActual.add(record.get("d1"));
+    dimensionsActual.add(record.get("d2"));
+    dimensionsActual.add(record.get("d3"));
+    return dimensionsActual;
+  }
+
+  private List<Integer> getMetricsFromRecord(GenericRecord record) {
+    List<Integer> metricsActual = new ArrayList<>();
+    metricsActual.add((int) record.get("m1"));
+    metricsActual.add((int) record.get("m2"));
+    return metricsActual;
+  }
+
+
+  @After
+  public void cleanUp() throws IOException {
+
+    File f = new File(outputPath);
+    FileUtils.deleteDirectory(f);
+  }
+}
diff --git a/thirdeye/thirdeye-hadoop/src/test/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfigTest.java b/thirdeye/thirdeye-hadoop/src/test/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfigTest.java
new file mode 100644
index 0000000000..97a95efca8
--- /dev/null
+++ b/thirdeye/thirdeye-hadoop/src/test/java/org/apache/pinot/thirdeye/hadoop/config/ThirdEyeConfigTest.java
@@ -0,0 +1,249 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pinot.thirdeye.hadoop.config;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeUnit;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import org.apache.pinot.thirdeye.hadoop.config.MetricType;
+import org.apache.pinot.thirdeye.hadoop.config.TopKDimensionToMetricsSpec;
+import org.apache.pinot.thirdeye.hadoop.config.TopkWhitelistSpec;
+
+public class ThirdEyeConfigTest {
+
+  private Properties props;
+  private ThirdEyeConfig thirdeyeConfig;
+  private ThirdEyeConfig config;
+
+  @BeforeClass
+  public void setup() {
+    props = new Properties();
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_TYPES.toString(), "STRING,LONG,STRING");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2,m3");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "LONG,FLOAT,INT");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString(), "t1");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_TYPE.toString(), "DAYS");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_SIZE.toString(), "10");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_SPLIT_THRESHOLD.toString(), "1000");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_SPLIT_ORDER.toString(), "d1,d2,d3");
+
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_THRESHOLD_METRIC_NAMES.toString(), "m1,m3");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRIC_THRESHOLD_VALUES.toString(), "0.02,0.1");
+
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_DIMENSION_NAMES.toString(), "d2,d3");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d2", "m1,m2");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d2", "20,30");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_METRICS.toString() + ".d3", "m1");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TOPK_KVALUES.toString() + ".d3", "50");
+
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION_NAMES.toString(), "d1,d2");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d1", "x,y");
+    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_WHITELIST_DIMENSION.toString() + ".d2", "500");
+
+    thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
+
+  }
+
+
+
+  @Test(expectedExceptions = IllegalArgumentException.class)
+  public void testTableNameConfig() throws IllegalArgumentException {
+    Assert.assertEquals("collection", thirdeyeConfig.getCollection(), "Collection name not correctly set");
+    try {
+      props.remove(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString());
+      config = ThirdEyeConfig.fromProperties(props);
+    } finally {
+      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_TABLE_NAME.toString(), "collection");
+    }
+  }
+
+  @Test(expectedExceptions = IllegalArgumentException.class)
+  public void testDimensionsConfig() throws IllegalArgumentException {
+    Assert.assertEquals(3, thirdeyeConfig.getDimensionNames().size(), "Incorrect number of dimensions");
+    Assert.assertEquals(new String[]{"d1", "d2", "d3"}, thirdeyeConfig.getDimensionNames().toArray(), "Incorrect dimensions");
+
+    try {
+      props.remove(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString());
+      config = ThirdEyeConfig.fromProperties(props);
+    } finally {
+      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_DIMENSION_NAMES.toString(), "d1,d2,d3");
+    }
+  }
+
+  @Test
+  public void testMetricsConfig() throws IllegalArgumentException {
+    boolean failed = false;
+    Assert.assertEquals(3, thirdeyeConfig.getMetricNames().size(), "Incorrect number of metrics");
+    Assert.assertEquals(3, thirdeyeConfig.getMetrics().size(), "Incorrect number of metric specs");
+    Assert.assertEquals(new String[]{"m1", "m2", "m3"}, thirdeyeConfig.getMetricNames().toArray(), "Incorrect metrics");
+    MetricType[] actualMetricTypes = new MetricType[3];
+    for (int i = 0; i < 3; i++) {
+      actualMetricTypes[i] = thirdeyeConfig.getMetrics().get(i).getType();
+    }
+    Assert.assertEquals(actualMetricTypes, new MetricType[]{MetricType.LONG, MetricType.FLOAT, MetricType.INT}, "Incorrect metric specs");
+
+    try {
+      props.remove(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString());
+      config = ThirdEyeConfig.fromProperties(props);
+    } catch (IllegalArgumentException e) {
+      failed = true;
+      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2,m3");
+    }
+    Assert.assertTrue(failed, "Expected exception due to missing metric names property");
+
+    failed = false;
+    try {
+      props.remove(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString());
+      config = ThirdEyeConfig.fromProperties(props);
+    } catch (IllegalArgumentException e) {
+      failed = true;
+      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), "LONG,FLOAT,INT");
+    }
+    Assert.assertTrue(failed, "Expected exception due to missing metric types property");
+
+    failed = false;
+    try {
+      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2");
+      config = ThirdEyeConfig.fromProperties(props);
+    } catch (IllegalStateException e) {
+      failed = true;
+      props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString(), "m1,m2,m3");
+    }
+    Assert.assertTrue(failed, "Expecetd exception due to inequal number of metric names and types in properties");
+  }
+
+  @Test
+  public void testTimeConfig() throws IllegalArgumentException {
+    boolean failed = false;
+    Assert.assertEquals(thirdeyeConfig.getTime().getColumnName(), "t1", "Incorrect time column name");
+    Assert.assertNull(thirdeyeConfig.getInputTime(), "Incorrect input time column name");
+    Assert.assertEquals(thirdeyeConfig.getTime().getTimeGranularity().getSize(), 10, "Incorrect time size");
+    Assert.assertEquals(thirdeyeConfig.getTime().getTimeGranularity().getUnit(), TimeUnit.DAYS, "Incorrect time unit");
+
+    try {
+      props.remove(ThirdEyeConfigProperties.THIRDEYE_TIMECOLUMN_NAME.toString());
+      config = ThirdEyeConfig.fromProperties(props);
+    } catch (IllegalArgumentException e) {
+      failed = true;

  (This diff was longer than 20,000 lines, and has been truncated...)


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pinot.apache.org
For additional commands, e-mail: dev-help@pinot.apache.org