You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ka...@apache.org on 2023/04/23 15:58:17 UTC

[druid] branch master updated: preserve explicitly specified dimension schema in "logical" schema of sampler response (#14144)

This is an automated email from the ASF dual-hosted git repository.

karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 887f8db1b5 preserve explicitly specified dimension schema in "logical" schema of sampler response (#14144)
887f8db1b5 is described below

commit 887f8db1b52b5d9cdc978ec71a528d6993290647
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Sun Apr 23 08:58:05 2023 -0700

    preserve explicitly specified dimension schema in "logical" schema of sampler response (#14144)
---
 .../overlord/sampler/InputSourceSampler.java       |  17 ++-
 .../sampler/InputSourceSamplerDiscoveryTest.java   | 115 +++++++++++++++++++++
 2 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
index 32033fe17a..a84cb4e197 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
@@ -247,10 +247,19 @@ public class InputSourceSampler
           if (!SamplerInputRow.SAMPLER_ORDERING_COLUMN.equals(dimensionDesc.getName())) {
             final ColumnType columnType = dimensionDesc.getCapabilities().toColumnType();
             signatureBuilder.add(dimensionDesc.getName(), columnType);
-            // for now, use legacy types instead of standard type
-            logicalDimensionSchemas.add(
-                DimensionSchema.getDefaultSchemaForBuiltInType(dimensionDesc.getName(), dimensionDesc.getCapabilities())
-            );
+            // use explicitly specified dimension schema if it exists
+            if (dataSchema != null &&
+                dataSchema.getDimensionsSpec() != null &&
+                dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()) != null) {
+              logicalDimensionSchemas.add(dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()));
+            } else {
+              logicalDimensionSchemas.add(
+                  DimensionSchema.getDefaultSchemaForBuiltInType(
+                      dimensionDesc.getName(),
+                      dimensionDesc.getCapabilities()
+                  )
+              );
+            }
             physicalDimensionSchemas.add(
                 dimensionDesc.getIndexer().getFormat().getColumnSchema(dimensionDesc.getName())
             );
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerDiscoveryTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerDiscoveryTest.java
index c1a7ebdde1..ee9c23ab31 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerDiscoveryTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerDiscoveryTest.java
@@ -33,6 +33,7 @@ import org.apache.druid.data.input.impl.TimestampSpec;
 import org.apache.druid.jackson.DefaultObjectMapper;
 import org.apache.druid.math.expr.ExpressionProcessing;
 import org.apache.druid.segment.AutoTypeColumnSchema;
+import org.apache.druid.segment.NestedDataDimensionSchema;
 import org.apache.druid.segment.column.ColumnType;
 import org.apache.druid.segment.column.RowSignature;
 import org.apache.druid.segment.indexing.DataSchema;
@@ -183,4 +184,118 @@ public class InputSourceSamplerDiscoveryTest extends InitializedNullHandlingTest
       ExpressionProcessing.initializeForTests();
     }
   }
+
+  @Test
+  public void testTypesClassicDiscovery()
+  {
+    final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
+    final DataSchema dataSchema = new DataSchema(
+        "test",
+        new TimestampSpec("t", null, null),
+        DimensionsSpec.builder().build(),
+        null,
+        null,
+        null
+    );
+    final SamplerResponse response = inputSourceSampler.sample(
+        inputSource,
+        new JsonInputFormat(null, null, null, null, null),
+        dataSchema,
+        null
+    );
+
+    Assert.assertEquals(6, response.getNumRowsRead());
+    Assert.assertEquals(5, response.getNumRowsIndexed());
+    Assert.assertEquals(6, response.getData().size());
+    Assert.assertEquals(
+        ImmutableList.of(
+            new StringDimensionSchema("string"),
+            new StringDimensionSchema("long"),
+            new StringDimensionSchema("double"),
+            new StringDimensionSchema("bool"),
+            new StringDimensionSchema("variant"),
+            new StringDimensionSchema("array")
+        ),
+        response.getLogicalDimensions()
+    );
+
+    Assert.assertEquals(
+        ImmutableList.of(
+            new StringDimensionSchema("string"),
+            new StringDimensionSchema("long"),
+            new StringDimensionSchema("double"),
+            new StringDimensionSchema("bool"),
+            new StringDimensionSchema("variant"),
+            new StringDimensionSchema("array")
+        ),
+        response.getPhysicalDimensions()
+    );
+    Assert.assertEquals(
+        RowSignature.builder()
+                    .addTimeColumn()
+                    .add("string", ColumnType.STRING)
+                    .add("long", ColumnType.STRING)
+                    .add("double", ColumnType.STRING)
+                    .add("bool", ColumnType.STRING)
+                    .add("variant", ColumnType.STRING)
+                    .add("array", ColumnType.STRING)
+                    .build(),
+        response.getLogicalSegmentSchema()
+    );
+  }
+
+  @Test
+  public void testTypesNoDiscoveryExplicitSchema()
+  {
+    final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
+    final DataSchema dataSchema = new DataSchema(
+        "test",
+        new TimestampSpec("t", null, null),
+        DimensionsSpec.builder().setDimensions(
+            ImmutableList.of(new StringDimensionSchema("string"),
+                             new LongDimensionSchema("long"),
+                             new DoubleDimensionSchema("double"),
+                             new StringDimensionSchema("bool"),
+                             new NestedDataDimensionSchema("variant"),
+                             new NestedDataDimensionSchema("array"),
+                             new NestedDataDimensionSchema("nested")
+            )
+        ).build(),
+        null,
+        null,
+        null
+    );
+    final SamplerResponse response = inputSourceSampler.sample(
+        inputSource,
+        new JsonInputFormat(null, null, null, null, null),
+        dataSchema,
+        null
+    );
+
+    Assert.assertEquals(6, response.getNumRowsRead());
+    Assert.assertEquals(5, response.getNumRowsIndexed());
+    Assert.assertEquals(6, response.getData().size());
+    Assert.assertEquals(
+        dataSchema.getDimensionsSpec().getDimensions(),
+        response.getLogicalDimensions()
+    );
+
+    Assert.assertEquals(
+        dataSchema.getDimensionsSpec().getDimensions(),
+        response.getPhysicalDimensions()
+    );
+    Assert.assertEquals(
+        RowSignature.builder()
+                    .addTimeColumn()
+                    .add("string", ColumnType.STRING)
+                    .add("long", ColumnType.LONG)
+                    .add("double", ColumnType.DOUBLE)
+                    .add("bool", ColumnType.STRING)
+                    .add("variant", ColumnType.NESTED_DATA)
+                    .add("array", ColumnType.NESTED_DATA)
+                    .add("nested", ColumnType.NESTED_DATA)
+                    .build(),
+        response.getLogicalSegmentSchema()
+    );
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org