You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cc...@apache.org on 2020/04/03 00:32:48 UTC

[druid] branch master updated: DruidInputSource can add new dimensions during re-ingestion (#9590)

This is an automated email from the ASF dual-hosted git repository.

ccaominh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new af3337d  DruidInputSource can add new dimensions during re-ingestion (#9590)
af3337d is described below

commit af3337dac8091d7499a86014de125b26ed90fbdc
Author: Suneet Saldanha <44...@users.noreply.github.com>
AuthorDate: Thu Apr 2 17:32:31 2020 -0700

    DruidInputSource can add new dimensions during re-ingestion (#9590)
    
    * WIP integration tests
    
    * Add integration test for ingestion with transformSpec
    
    * WIP almost working tests
    
    * Add ignored tests
    
    * checkstyle stuff
    
    * remove newPage from index task ingestion spec
    
    * more test cleanup
    
    * still not quite working
    
    * Actually disable the tests
    
    * working tests
    
    * fix codestyle
    
    * dont use junit in integration tests
    
    * actually fix the bug
    
    * fix checkstyle
    
    * bring index tests closer to reindex tests
---
 examples/quickstart/tutorial/transform-index.json  |   2 +-
 .../indexing/common/ReingestionTimelineUtils.java  |  32 ++++++
 .../firehose/IngestSegmentFirehoseFactory.java     |  17 +--
 .../druid/indexing/input/DruidInputSource.java     |  16 +--
 .../druid/tests/indexer/ITTransformTest.java       | 115 +++++++++++++++++++++
 .../wikipedia_index_queries_with_transform.json    |  62 +++++++++++
 ...edia_index_task_with_inputsource_transform.json | 103 ++++++++++++++++++
 .../wikipedia_index_task_with_transform.json       | 103 ++++++++++++++++++
 ...ex_druid_input_source_task_with_transforms.json | 106 +++++++++++++++++++
 .../wikipedia_reindex_queries_with_transforms.json |  80 ++++++++++++++
 .../wikipedia_reindex_task_with_transforms.json    | 108 +++++++++++++++++++
 11 files changed, 720 insertions(+), 24 deletions(-)

diff --git a/examples/quickstart/tutorial/transform-index.json b/examples/quickstart/tutorial/transform-index.json
index bf605fc..caebb9f 100644
--- a/examples/quickstart/tutorial/transform-index.json
+++ b/examples/quickstart/tutorial/transform-index.json
@@ -55,7 +55,7 @@
         "baseDir" : "quickstart/tutorial",
         "filter" : "transform-data.json"
       },
-      "inpuFormat" : {
+      "inputFormat" : {
         "type" : "json"
       },
       "appendToExisting" : false
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
index 1f4820f..bd9d214 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/ReingestionTimelineUtils.java
@@ -22,11 +22,13 @@ package org.apache.druid.indexing.common;
 import com.google.common.collect.BiMap;
 import com.google.common.collect.HashBiMap;
 import com.google.common.collect.Lists;
+import org.apache.druid.data.input.impl.DimensionsSpec;
 import org.apache.druid.timeline.DataSegment;
 import org.apache.druid.timeline.TimelineObjectHolder;
 import org.apache.druid.timeline.partition.PartitionChunk;
 
 import javax.annotation.Nullable;
+import javax.validation.constraints.NotNull;
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
@@ -101,4 +103,34 @@ public class ReingestionTimelineUtils
                     .mapToObj(orderedMetrics::get)
                     .collect(Collectors.toList());
   }
+
+  /**
+   * Utility function to get dimensions that should be ingested. The preferred order is
+   * - Explicit dimensions if they are provided.
+   * - Custom dimensions are provided in the inputSpec.
+   * - Calculate dimensions from the timeline but exclude any dimension exclusions.
+   *
+   * @param explicitDimensions sent as part of the re-ingestion InputSource.
+   * @param dimensionsSpec from the provided ingestion spec.
+   * @param timeLineSegments for the datasource that is being read.
+   * @return
+   */
+  public static List<String> getDimensionsToReingest(
+      @Nullable List<String> explicitDimensions,
+      @NotNull DimensionsSpec dimensionsSpec,
+      @NotNull List<TimelineObjectHolder<String, DataSegment>> timeLineSegments)
+  {
+    final List<String> dims;
+    if (explicitDimensions != null) {
+      dims = explicitDimensions;
+    } else if (dimensionsSpec.hasCustomDimensions()) {
+      dims = dimensionsSpec.getDimensionNames();
+    } else {
+      dims = ReingestionTimelineUtils.getUniqueDimensions(
+          timeLineSegments,
+          dimensionsSpec.getDimensionExclusions()
+      );
+    }
+    return dims;
+  }
 }
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
index 41c1c8f..6248828 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/firehose/IngestSegmentFirehoseFactory.java
@@ -214,18 +214,11 @@ public class IngestSegmentFirehoseFactory implements FiniteFirehoseFactory<Input
       }
     }
 
-    final List<String> dims;
-    if (dimensions != null) {
-      dims = dimensions;
-    } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
-      dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
-    } else {
-      dims = ReingestionTimelineUtils.getUniqueDimensions(
-        timeLineSegments,
-        inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()
-      );
-    }
-
+    final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(
+        dimensions,
+        inputRowParser.getParseSpec().getDimensionsSpec(),
+        timeLineSegments
+    );
     final List<String> metricsList = metrics == null
                                      ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments)
                                      : metrics;
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
index 8fc2142..b9cc575 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/input/DruidInputSource.java
@@ -181,17 +181,11 @@ public class DruidInputSource extends AbstractInputSource implements SplittableI
               .from(partitionHolder)
               .transform(chunk -> new DruidSegmentInputEntity(segmentLoader, chunk.getObject(), holder.getInterval()));
         }).iterator();
-    final List<String> effectiveDimensions;
-    if (dimensions == null) {
-      effectiveDimensions = ReingestionTimelineUtils.getUniqueDimensions(
-          timeline,
-          inputRowSchema.getDimensionsSpec().getDimensionExclusions()
-      );
-    } else if (inputRowSchema.getDimensionsSpec().hasCustomDimensions()) {
-      effectiveDimensions = inputRowSchema.getDimensionsSpec().getDimensionNames();
-    } else {
-      effectiveDimensions = dimensions;
-    }
+    final List<String> effectiveDimensions = ReingestionTimelineUtils.getDimensionsToReingest(
+        dimensions,
+        inputRowSchema.getDimensionsSpec(),
+        timeline
+    );
 
     List<String> effectiveMetrics;
     if (metrics == null) {
diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java
new file mode 100644
index 0000000..4a51164
--- /dev/null
+++ b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITTransformTest.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.tests.indexer;
+
+import org.apache.druid.testing.guice.DruidTestModuleFactory;
+import org.apache.druid.tests.TestNGGroup;
+import org.testng.annotations.Guice;
+import org.testng.annotations.Test;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+@Test(groups = {TestNGGroup.BATCH_INDEX, TestNGGroup.QUICKSTART_COMPATIBLE})
+@Guice(moduleFactory = DruidTestModuleFactory.class)
+public class ITTransformTest extends AbstractITBatchIndexTest
+{
+  private static final String INDEX_TASK_WITH_FIREHOSE = "/indexer/wikipedia_index_task_with_transform.json";
+  private static final String INDEX_TASK_WITH_INPUT_SOURCE = "/indexer/wikipedia_index_task_with_inputsource_transform.json";
+  private static final String INDEX_QUERIES_RESOURCE = "/indexer/wikipedia_index_queries_with_transform.json";
+  private static final String INDEX_DATASOURCE = "wikipedia_index_test";
+
+  private static final String REINDEX_TASK = "/indexer/wikipedia_reindex_task_with_transforms.json";
+  private static final String REINDEX_TASK_WITH_DRUID_INPUT_SOURCE = "/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json";
+  private static final String REINDEX_QUERIES_RESOURCE = "/indexer/wikipedia_reindex_queries_with_transforms.json";
+  private static final String REINDEX_DATASOURCE = "wikipedia_reindex_test";
+
+  @Test
+  public void testIndexAndReIndexWithTransformSpec() throws IOException
+  {
+    final String reindexDatasourceWithDruidInputSource = REINDEX_DATASOURCE + "-druidInputSource";
+
+    try (
+        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored2 = unloader(reindexDatasourceWithDruidInputSource + config.getExtraDatasourceNameSuffix())
+    ) {
+      doIndexTest(
+          INDEX_DATASOURCE,
+          INDEX_TASK_WITH_INPUT_SOURCE,
+          INDEX_QUERIES_RESOURCE,
+          false,
+          true,
+          true
+      );
+      doReindexTest(
+          INDEX_DATASOURCE,
+          reindexDatasourceWithDruidInputSource,
+          REINDEX_TASK_WITH_DRUID_INPUT_SOURCE,
+          REINDEX_QUERIES_RESOURCE
+      );
+    }
+  }
+
+  @Test(enabled = false)
+  public void testIndexAndReIndexUsingIngestSegmentWithTransforms() throws IOException
+  {
+    // TODO: re-instate this test when https://github.com/apache/druid/issues/9591 is fixed
+    // Move the re-index step into testIndexAndReIndexWithTransformSpec for faster tests!
+    final String reindexDatasource = REINDEX_DATASOURCE + "-testIndexData";
+    try (
+        final Closeable ignored1 = unloader(INDEX_DATASOURCE + config.getExtraDatasourceNameSuffix());
+        final Closeable ignored2 = unloader(reindexDatasource + config.getExtraDatasourceNameSuffix())
+    ) {
+      doIndexTest(
+          INDEX_DATASOURCE,
+          INDEX_TASK_WITH_INPUT_SOURCE,
+          INDEX_QUERIES_RESOURCE,
+          false,
+          true,
+          true
+      );
+      doReindexTest(
+          INDEX_DATASOURCE,
+          reindexDatasource,
+          REINDEX_TASK,
+          REINDEX_QUERIES_RESOURCE
+      );
+    }
+  }
+
+  @Test(enabled = false)
+  public void testIndexWithFirehoseAndTransforms() throws IOException
+  {
+    // TODO: re-instate this test when https://github.com/apache/druid/issues/9589 is fixed
+    final String indexDatasource = INDEX_DATASOURCE + "-firehose";
+    try (
+        final Closeable ignored1 = unloader(indexDatasource + config.getExtraDatasourceNameSuffix());
+    ) {
+      doIndexTest(
+          indexDatasource,
+          INDEX_TASK_WITH_FIREHOSE,
+          INDEX_QUERIES_RESOURCE,
+          false,
+          true,
+          true
+      );
+    }
+  }
+}
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json
new file mode 100644
index 0000000..f0cfba6
--- /dev/null
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_queries_with_transform.json
@@ -0,0 +1,62 @@
+[
+    {
+        "description":"having spec on post aggregation",
+        "query":{
+            "queryType":"groupBy",
+            "dataSource":"%%DATASOURCE%%",
+            "granularity":"day",
+            "dimensions":[
+                "page",
+                "city"
+            ],
+            "filter":{
+                "type":"selector",
+                "dimension":"language",
+                "value":"language-zh"
+            },
+            "aggregations":[
+                {
+                    "type":"count",
+                    "name":"rows"
+                },
+                {
+                    "type":"longSum",
+                    "fieldName":"triple-added",
+                    "name":"added_count"
+                },
+                {
+                    "type":"longSum",
+                    "fieldName":"delta",
+                    "name":"delta_sum"
+                }
+            ],
+            "postAggregations": [
+                {
+                    "type":"arithmetic",
+                    "name":"added_count_times_ten",
+                    "fn":"*",
+                    "fields":[
+                        {"type":"fieldAccess", "name":"added_count", "fieldName":"added_count"},
+                        {"type":"constant", "name":"const", "value":10}
+                    ]
+                }
+            ],
+            "having":{"type":"greaterThan", "aggregation":"added_count_times_ten", "value":9000},
+            "intervals":[
+                "2013-08-31T00:00/2013-09-01T00:00"
+            ]
+        },
+        "expectedResults":[ {
+            "version" : "v1",
+            "timestamp" : "2013-08-31T00:00:00.000Z",
+            "event" : {
+                "added_count_times_ten" : 27150.0,
+                "page" : "Crimson Typhoon",
+                "city" : "Taiyuan",
+                "added_count" : 2715,
+                "delta_sum" : 900,
+                "rows" : 1
+            }
+        } ]
+    }
+]
\ No newline at end of file
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json
new file mode 100644
index 0000000..bef8f01
--- /dev/null
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_inputsource_transform.json
@@ -0,0 +1,103 @@
+{
+    "type" : "index",
+    "spec" : {
+        "dataSchema" : {
+            "dataSource" : "%%DATASOURCE%%",
+            "timestampSpec": {
+                "column": "timestamp"
+            },
+            "metricsSpec": [
+                {
+                    "type": "count",
+                    "name": "count"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "added",
+                    "fieldName": "added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "triple-added",
+                    "fieldName": "triple-added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "deleted",
+                    "fieldName": "deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "delta",
+                    "fieldName": "delta"
+                },
+                {
+                    "name": "thetaSketch",
+                    "type": "thetaSketch",
+                    "fieldName": "user"
+                },
+                {
+                    "name": "quantilesDoublesSketch",
+                    "type": "quantilesDoublesSketch",
+                    "fieldName": "delta"
+                },
+                {
+                    "name": "HLLSketchBuild",
+                    "type": "HLLSketchBuild",
+                    "fieldName": "user"
+                }
+            ],
+            "granularitySpec": {
+                "segmentGranularity": "DAY",
+                "queryGranularity": "second",
+                "intervals" : [ "2013-08-31/2013-09-02" ]
+            },
+            "dimensionsSpec": {
+                "dimensions": [
+                    "page",
+                    {"type": "string", "name": "language", "createBitmapIndex": false},
+                    "user",
+                    "unpatrolled",
+                    "robot",
+                    "anonymous",
+                    "namespace",
+                    "continent",
+                    "country",
+                    "region",
+                    "city"
+                ]
+            },
+            "transformSpec": {
+                "transforms": [
+                    {
+                        "type": "expression",
+                        "name": "language",
+                        "expression": "concat('language-', language)"
+                    },
+                    {
+                        "type": "expression",
+                        "name": "triple-added",
+                        "expression": "added * 3"
+                    }
+                ]
+            }
+        },
+        "ioConfig" : {
+            "type" : "index",
+            "inputSource" : {
+                "type" : "local",
+                "baseDir" : "/resources/data/batch_index",
+                "filter" : "wikipedia_index_data*"
+            },
+            "inputFormat" : {
+                "type" : "json"
+            },
+            "appendToExisting" : false
+        },
+        "tuningConfig" : {
+            "type" : "index",
+            "maxRowsPerSegment" : 5000000,
+            "maxRowsInMemory" : 25000
+        }
+    }
+}
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json
new file mode 100644
index 0000000..5f289dc
--- /dev/null
+++ b/integration-tests/src/test/resources/indexer/wikipedia_index_task_with_transform.json
@@ -0,0 +1,103 @@
+{
+    "type": "index",
+    "spec": {
+        "dataSchema": {
+            "dataSource": "%%DATASOURCE%%",
+            "metricsSpec": [
+                {
+                    "type": "count",
+                    "name": "count"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "added",
+                    "fieldName": "added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "triple-added",
+                    "fieldName": "triple-added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "deleted",
+                    "fieldName": "deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "delta",
+                    "fieldName": "delta"
+                },
+                {
+                    "name": "thetaSketch",
+                    "type": "thetaSketch",
+                    "fieldName": "user"
+                },
+                {
+                    "name": "quantilesDoublesSketch",
+                    "type": "quantilesDoublesSketch",
+                    "fieldName": "delta"
+                },
+                {
+                    "name": "HLLSketchBuild",
+                    "type": "HLLSketchBuild",
+                    "fieldName": "user"
+                }
+            ],
+            "granularitySpec": {
+                "segmentGranularity": "DAY",
+                "queryGranularity": "second",
+                "intervals" : [ "2013-08-31/2013-09-02" ]
+            },
+            "parser": {
+                "parseSpec": {
+                    "format" : "json",
+                    "timestampSpec": {
+                        "column": "timestamp"
+                    },
+                    "dimensionsSpec": {
+                        "dimensions": [
+                            "page",
+                            {"type": "string", "name": "language", "createBitmapIndex": false},
+                            "user",
+                            "unpatrolled",
+                            "robot",
+                            "anonymous",
+                            "namespace",
+                            "continent",
+                            "country",
+                            "region",
+                            "city"
+                        ]
+                    }
+                }
+            },
+            "transformSpec": {
+                "transforms": [
+                    {
+                        "type": "expression",
+                        "name": "language",
+                        "expression": "concat('language-', language)"
+                    },
+                    {
+                        "type": "expression",
+                        "name": "triple-added",
+                        "expression": "added * 3"
+                    }
+                ]
+            }
+        },
+        "ioConfig": {
+            "type": "index",
+            "firehose": {
+                "type": "local",
+                "baseDir": "/resources/data/batch_index",
+                "filter": "wikipedia_index_data*"
+            }
+        },
+        "tuningConfig": {
+            "type": "index",
+            "maxRowsPerSegment": 3
+        }
+    }
+}
\ No newline at end of file
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json
new file mode 100644
index 0000000..3e8a44c
--- /dev/null
+++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_druid_input_source_task_with_transforms.json
@@ -0,0 +1,106 @@
+{
+    "type": "index",
+    "spec": {
+        "ioConfig": {
+            "type": "index",
+            "inputSource": {
+                "type": "druid",
+                "dataSource": "%%DATASOURCE%%",
+                "interval": "2013-08-31/2013-09-01"
+            }
+        },
+        "tuningConfig": {
+            "type": "index",
+            "partitionsSpec": {
+                "type": "dynamic"
+            }
+        },
+        "dataSchema": {
+            "dataSource": "%%REINDEX_DATASOURCE%%",
+            "granularitySpec": {
+                "type": "uniform",
+                "queryGranularity": "SECOND",
+                "segmentGranularity": "DAY"
+            },
+            "timestampSpec": {
+                "column": "__time",
+                "format": "iso"
+            },
+            "dimensionsSpec": {
+                "dimensions": [
+                    {"type": "string", "name": "language", "createBitmapIndex": false},
+                    "user",
+                    "unpatrolled",
+                    "page",
+                    "newPage",
+                    "anonymous",
+                    "namespace",
+                    "country",
+                    "region",
+                    "city"
+                ]
+            },
+            "transformSpec": {
+                "transforms": [
+                    {
+                        "type": "expression",
+                        "name": "newPage",
+                        "expression": "page"
+                    },
+                    {
+                        "type": "expression",
+                        "name": "city",
+                        "expression": "concat('city-', city)"
+                    },
+                    {
+                        "type": "expression",
+                        "name": "one-plus-triple-added",
+                        "expression": "\"triple-added\" + 1"
+                    },
+                    {
+                        "type": "expression",
+                        "name": "delta",
+                        "expression": "\"delta\" / 2"
+                    },
+                    {
+                        "type": "expression",
+                        "name": "double-deleted",
+                        "expression": "deleted * 2"
+                    }
+                ]
+            },
+            "metricsSpec": [
+                {
+                    "type": "doubleSum",
+                    "name": "added",
+                    "fieldName": "added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "triple-added",
+                    "fieldName": "triple-added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "one-plus-triple-added",
+                    "fieldName": "one-plus-triple-added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "deleted",
+                    "fieldName": "deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "double-deleted",
+                    "fieldName": "double-deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "delta",
+                    "fieldName": "delta"
+                }
+            ]
+        }
+    }
+}
\ No newline at end of file
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json
new file mode 100644
index 0000000..4050312
--- /dev/null
+++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_queries_with_transforms.json
@@ -0,0 +1,80 @@
+[
+    {
+        "description": "timeseries, 1 agg, all",
+        "query":{
+            "queryType" : "timeBoundary",
+            "dataSource": "%%DATASOURCE%%"
+        },
+        "expectedResults":[
+            {
+                "timestamp" : "2013-08-31T01:02:33.000Z",
+                "result" : {
+                    "minTime" : "2013-08-31T01:02:33.000Z",
+                    "maxTime" : "2013-08-31T12:41:27.000Z"
+                }
+            }
+        ]
+    },
+
+    {
+        "description":"having spec on post aggregation",
+        "query":{
+            "queryType":"groupBy",
+            "dataSource":"%%DATASOURCE%%",
+            "granularity":"day",
+            "dimensions":[
+                "newPage",
+                "city"
+            ],
+            "filter":{
+                "type":"selector",
+                "dimension":"language",
+                "value":"language-zh"
+            },
+            "aggregations":[
+                {
+                    "type":"longSum",
+                    "fieldName":"one-plus-triple-added",
+                    "name":"added_count"
+                },
+                {
+                    "type":"longSum",
+                    "fieldName":"double-deleted",
+                    "name":"double_deleted_count"
+                },
+                {
+                    "type":"longSum",
+                    "fieldName":"delta",
+                    "name":"delta_overshadowed"
+                }
+            ],
+            "postAggregations": [
+                {
+                    "type":"arithmetic",
+                    "name":"added_count_times_ten",
+                    "fn":"*",
+                    "fields":[
+                        {"type":"fieldAccess", "name":"added_count", "fieldName":"added_count"},
+                        {"type":"constant", "name":"const", "value":10}
+                    ]
+                }
+            ],
+            "having":{"type":"greaterThan", "aggregation":"added_count_times_ten", "value":9000},
+            "intervals":[
+                "2013-08-31T00:00/2013-09-01T00:00"
+            ]
+        },
+        "expectedResults":[ {
+            "version" : "v1",
+            "timestamp" : "2013-08-31T00:00:00.000Z",
+            "event" : {
+                "added_count_times_ten" : 27160.0,
+                "newPage" : "Crimson Typhoon",
+                "city" : "city-Taiyuan",
+                "double_deleted_count" : 10,
+                "delta_overshadowed" : 450,
+                "added_count" : 2716
+            }
+        } ]
+    }
+]
diff --git a/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json
new file mode 100644
index 0000000..029b136
--- /dev/null
+++ b/integration-tests/src/test/resources/indexer/wikipedia_reindex_task_with_transforms.json
@@ -0,0 +1,108 @@
+{
+    "type": "index",
+    "spec": {
+        "dataSchema": {
+            "dataSource": "%%REINDEX_DATASOURCE%%",
+            "metricsSpec": [
+                {
+                    "type": "doubleSum",
+                    "name": "added",
+                    "fieldName": "added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "triple-added",
+                    "fieldName": "triple-added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "one-plus-triple-added",
+                    "fieldName": "one-plus-triple-added"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "deleted",
+                    "fieldName": "deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "double-deleted",
+                    "fieldName": "double-deleted"
+                },
+                {
+                    "type": "doubleSum",
+                    "name": "delta",
+                    "fieldName": "delta"
+                }
+            ],
+            "granularitySpec": {
+                "segmentGranularity": "DAY",
+                "queryGranularity": "second",
+                "intervals" : [ "2013-08-31/2013-09-01" ]
+            },
+            "parser": {
+                "parseSpec": {
+                    "format" : "json",
+                    "timestampSpec": {
+                        "column": "timestamp",
+                        "format": "iso"
+                    },
+                    "dimensionsSpec": {
+                        "dimensions": [
+                            {"type": "string", "name": "language", "createBitmapIndex": false},
+                            "user",
+                            "unpatrolled",
+                            "page",
+                            "newPage",
+                            "anonymous",
+                            "namespace",
+                            "country",
+                            "region",
+                            "city"
+                        ]
+                    },
+                    "transformSpec": {
+                        "transforms": [
+                            {
+                                "type": "expression",
+                                "name": "newPage",
+                                "expression": "page"
+                            },
+                            {
+                                "type": "expression",
+                                "name": "city",
+                                "expression": "concat('city-', city)"
+                            },
+                            {
+                                "type": "expression",
+                                "name": "one-plus-triple-added",
+                                "expression": "\"triple-added\" + 1"
+                            },
+                            {
+                                "type": "expression",
+                                "name": "delta",
+                                "expression": "\"delta\" / 2"
+                            },
+                            {
+                                "type": "expression",
+                                "name": "double-deleted",
+                                "expression": "deleted * 2"
+                            }
+                        ]
+                    }
+                }
+            }
+        },
+        "ioConfig": {
+            "type": "index",
+            "firehose": {
+                "type": "ingestSegment",
+                "dataSource": "%%DATASOURCE%%",
+                "interval": "2013-08-31/2013-09-01"
+            }
+        },
+        "tuningConfig": {
+            "type": "index"
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org