You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ka...@apache.org on 2023/04/23 15:58:17 UTC
[druid] branch master updated: preserve explicitly specified dimension schema in "logical" schema of sampler response (#14144)
This is an automated email from the ASF dual-hosted git repository.
karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 887f8db1b5 preserve explicitly specified dimension schema in "logical" schema of sampler response (#14144)
887f8db1b5 is described below
commit 887f8db1b52b5d9cdc978ec71a528d6993290647
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Sun Apr 23 08:58:05 2023 -0700
preserve explicitly specified dimension schema in "logical" schema of sampler response (#14144)
---
.../overlord/sampler/InputSourceSampler.java | 17 ++-
.../sampler/InputSourceSamplerDiscoveryTest.java | 115 +++++++++++++++++++++
2 files changed, 128 insertions(+), 4 deletions(-)
diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
index 32033fe17a..a84cb4e197 100644
--- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
+++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/sampler/InputSourceSampler.java
@@ -247,10 +247,19 @@ public class InputSourceSampler
if (!SamplerInputRow.SAMPLER_ORDERING_COLUMN.equals(dimensionDesc.getName())) {
final ColumnType columnType = dimensionDesc.getCapabilities().toColumnType();
signatureBuilder.add(dimensionDesc.getName(), columnType);
- // for now, use legacy types instead of standard type
- logicalDimensionSchemas.add(
- DimensionSchema.getDefaultSchemaForBuiltInType(dimensionDesc.getName(), dimensionDesc.getCapabilities())
- );
+ // use explicitly specified dimension schema if it exists
+ if (dataSchema != null &&
+ dataSchema.getDimensionsSpec() != null &&
+ dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()) != null) {
+ logicalDimensionSchemas.add(dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()));
+ } else {
+ logicalDimensionSchemas.add(
+ DimensionSchema.getDefaultSchemaForBuiltInType(
+ dimensionDesc.getName(),
+ dimensionDesc.getCapabilities()
+ )
+ );
+ }
physicalDimensionSchemas.add(
dimensionDesc.getIndexer().getFormat().getColumnSchema(dimensionDesc.getName())
);
diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerDiscoveryTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerDiscoveryTest.java
index c1a7ebdde1..ee9c23ab31 100644
--- a/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerDiscoveryTest.java
+++ b/indexing-service/src/test/java/org/apache/druid/indexing/overlord/sampler/InputSourceSamplerDiscoveryTest.java
@@ -33,6 +33,7 @@ import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.segment.AutoTypeColumnSchema;
+import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.indexing.DataSchema;
@@ -183,4 +184,118 @@ public class InputSourceSamplerDiscoveryTest extends InitializedNullHandlingTest
ExpressionProcessing.initializeForTests();
}
}
+
+ @Test
+ public void testTypesClassicDiscovery()
+ {
+ final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
+ final DataSchema dataSchema = new DataSchema(
+ "test",
+ new TimestampSpec("t", null, null),
+ DimensionsSpec.builder().build(),
+ null,
+ null,
+ null
+ );
+ final SamplerResponse response = inputSourceSampler.sample(
+ inputSource,
+ new JsonInputFormat(null, null, null, null, null),
+ dataSchema,
+ null
+ );
+
+ Assert.assertEquals(6, response.getNumRowsRead());
+ Assert.assertEquals(5, response.getNumRowsIndexed());
+ Assert.assertEquals(6, response.getData().size());
+ Assert.assertEquals(
+ ImmutableList.of(
+ new StringDimensionSchema("string"),
+ new StringDimensionSchema("long"),
+ new StringDimensionSchema("double"),
+ new StringDimensionSchema("bool"),
+ new StringDimensionSchema("variant"),
+ new StringDimensionSchema("array")
+ ),
+ response.getLogicalDimensions()
+ );
+
+ Assert.assertEquals(
+ ImmutableList.of(
+ new StringDimensionSchema("string"),
+ new StringDimensionSchema("long"),
+ new StringDimensionSchema("double"),
+ new StringDimensionSchema("bool"),
+ new StringDimensionSchema("variant"),
+ new StringDimensionSchema("array")
+ ),
+ response.getPhysicalDimensions()
+ );
+ Assert.assertEquals(
+ RowSignature.builder()
+ .addTimeColumn()
+ .add("string", ColumnType.STRING)
+ .add("long", ColumnType.STRING)
+ .add("double", ColumnType.STRING)
+ .add("bool", ColumnType.STRING)
+ .add("variant", ColumnType.STRING)
+ .add("array", ColumnType.STRING)
+ .build(),
+ response.getLogicalSegmentSchema()
+ );
+ }
+
+ @Test
+ public void testTypesNoDiscoveryExplicitSchema()
+ {
+ final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
+ final DataSchema dataSchema = new DataSchema(
+ "test",
+ new TimestampSpec("t", null, null),
+ DimensionsSpec.builder().setDimensions(
+ ImmutableList.of(new StringDimensionSchema("string"),
+ new LongDimensionSchema("long"),
+ new DoubleDimensionSchema("double"),
+ new StringDimensionSchema("bool"),
+ new NestedDataDimensionSchema("variant"),
+ new NestedDataDimensionSchema("array"),
+ new NestedDataDimensionSchema("nested")
+ )
+ ).build(),
+ null,
+ null,
+ null
+ );
+ final SamplerResponse response = inputSourceSampler.sample(
+ inputSource,
+ new JsonInputFormat(null, null, null, null, null),
+ dataSchema,
+ null
+ );
+
+ Assert.assertEquals(6, response.getNumRowsRead());
+ Assert.assertEquals(5, response.getNumRowsIndexed());
+ Assert.assertEquals(6, response.getData().size());
+ Assert.assertEquals(
+ dataSchema.getDimensionsSpec().getDimensions(),
+ response.getLogicalDimensions()
+ );
+
+ Assert.assertEquals(
+ dataSchema.getDimensionsSpec().getDimensions(),
+ response.getPhysicalDimensions()
+ );
+ Assert.assertEquals(
+ RowSignature.builder()
+ .addTimeColumn()
+ .add("string", ColumnType.STRING)
+ .add("long", ColumnType.LONG)
+ .add("double", ColumnType.DOUBLE)
+ .add("bool", ColumnType.STRING)
+ .add("variant", ColumnType.NESTED_DATA)
+ .add("array", ColumnType.NESTED_DATA)
+ .add("nested", ColumnType.NESTED_DATA)
+ .build(),
+ response.getLogicalSegmentSchema()
+ );
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org