You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by ab...@apache.org on 2023/05/03 08:33:19 UTC
[druid] branch 26.0.0 updated: Web console: allow stringly schemas in the data loader (#14189) (#14203)
This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch 26.0.0
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/26.0.0 by this push:
new 3f58b6b726 Web console: allow stringly schemas in the data loader (#14189) (#14203)
3f58b6b726 is described below
commit 3f58b6b7268a8d30f731392ee6197b831d630a80
Author: Vadim Ogievetsky <va...@ogievetsky.com>
AuthorDate: Wed May 3 01:33:03 2023 -0700
Web console: allow stringly schemas in the data loader (#14189) (#14203)
Backport of #14189 to 26
---
.../src/druid-models/execution/execution.ts | 5 +-
.../ingestion-spec/ingestion-spec.spec.ts | 4 +-
.../druid-models/ingestion-spec/ingestion-spec.tsx | 52 +++++---
.../src/views/load-data-view/info-messages.tsx | 8 +-
.../src/views/load-data-view/load-data-view.tsx | 135 ++++++++++++++++-----
5 files changed, 144 insertions(+), 60 deletions(-)
diff --git a/web-console/src/druid-models/execution/execution.ts b/web-console/src/druid-models/execution/execution.ts
index 97ecc14b2f..fd2c0881dd 100644
--- a/web-console/src/druid-models/execution/execution.ts
+++ b/web-console/src/druid-models/execution/execution.ts
@@ -140,10 +140,11 @@ function formatPendingMessage(
// If there are not enough slots free then there are two cases:
if (totalNeeded <= totalTaskSlots) {
- // (1) not enough free, but enough total: "Launched 2/4 tasks. Cluster is currently using 5/6 task slots. Waiting for 1 other task to finish."
+ // (1) not enough free, but enough total: "Launched 2/4 tasks. Cluster is currently using 5/6 task slots. Waiting for 1 task slot to become available."
const tasksThatNeedToFinish = pendingTasks - availableTaskSlots;
return (
- baseMessage + ` Waiting for ${pluralIfNeeded(tasksThatNeedToFinish, 'other task')} to finish.`
+ baseMessage +
+ ` Waiting for ${pluralIfNeeded(tasksThatNeedToFinish, 'task slot')} to become available.`
);
} else {
// (2) not enough total: "Launched 2/4 tasks. Cluster is currently using 2/2 task slots. Add more capacity or reduce maxNumTasks to 2 or lower."
diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts
index 03a7cd1d26..79a4d93e30 100644
--- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts
+++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.spec.ts
@@ -757,7 +757,7 @@ describe('spec utils', () => {
});
it('updateSchemaWithSample', () => {
- const withRollup = updateSchemaWithSample(ingestionSpec, JSON_SAMPLE, 'specific', true);
+ const withRollup = updateSchemaWithSample(ingestionSpec, JSON_SAMPLE, 'fixed', true);
expect(withRollup).toMatchInlineSnapshot(`
Object {
@@ -822,7 +822,7 @@ describe('spec utils', () => {
}
`);
- const noRollup = updateSchemaWithSample(ingestionSpec, JSON_SAMPLE, 'specific', false);
+ const noRollup = updateSchemaWithSample(ingestionSpec, JSON_SAMPLE, 'fixed', false);
expect(noRollup).toMatchInlineSnapshot(`
Object {
diff --git a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
index de3e39e3fd..8741668b59 100644
--- a/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
+++ b/web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx
@@ -267,14 +267,17 @@ export interface DataSchema {
metricsSpec?: MetricSpec[];
}
-export type DimensionMode = 'specific' | 'auto-detect';
+export type SchemaMode = 'fixed' | 'string-only-discovery' | 'type-aware-discovery';
-export function getDimensionMode(spec: Partial<IngestionSpec>): DimensionMode {
+export function getSchemaMode(spec: Partial<IngestionSpec>): SchemaMode {
if (deepGet(spec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery') === true) {
- return 'auto-detect';
+ return 'type-aware-discovery';
+ }
+ if (deepGet(spec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions') === true) {
+ return 'string-only-discovery';
}
const dimensions = deepGet(spec, 'spec.dataSchema.dimensionsSpec.dimensions') || EMPTY_ARRAY;
- return Array.isArray(dimensions) && dimensions.length === 0 ? 'auto-detect' : 'specific';
+ return Array.isArray(dimensions) && dimensions.length === 0 ? 'string-only-discovery' : 'fixed';
}
export function getRollup(spec: Partial<IngestionSpec>): boolean {
@@ -2418,7 +2421,7 @@ function getTypeHintsFromSpec(spec: Partial<IngestionSpec>): Record<string, stri
export function updateSchemaWithSample(
spec: Partial<IngestionSpec>,
sampleResponse: SampleResponse,
- dimensionMode: DimensionMode,
+ schemaMode: SchemaMode,
rollup: boolean,
forcePartitionInitialization = false,
): Partial<IngestionSpec> {
@@ -2429,20 +2432,31 @@ export function updateSchemaWithSample(
let newSpec = spec;
- if (dimensionMode === 'auto-detect') {
- newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery', true);
- newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions', true);
- newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions', []);
- newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensions');
- } else {
- newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery');
- newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions');
- newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions');
- newSpec = deepSet(
- newSpec,
- 'spec.dataSchema.dimensionsSpec.dimensions',
- getDimensionSpecs(sampleResponse, typeHints, guessNumericStringsAsNumbers, rollup),
- );
+ switch (schemaMode) {
+ case 'type-aware-discovery':
+ newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery', true);
+ newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions');
+ newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions', []);
+ newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensions');
+ break;
+
+ case 'string-only-discovery':
+ newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery');
+ newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions');
+ newSpec = deepSet(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions', []);
+ newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensions');
+ break;
+
+ case 'fixed':
+ newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.useSchemaDiscovery');
+ newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.includeAllDimensions');
+ newSpec = deepDelete(newSpec, 'spec.dataSchema.dimensionsSpec.dimensionExclusions');
+ newSpec = deepSet(
+ newSpec,
+ 'spec.dataSchema.dimensionsSpec.dimensions',
+ getDimensionSpecs(sampleResponse, typeHints, guessNumericStringsAsNumbers, rollup),
+ );
+ break;
}
if (rollup) {
diff --git a/web-console/src/views/load-data-view/info-messages.tsx b/web-console/src/views/load-data-view/info-messages.tsx
index 5c93b7a107..b88cf8a70c 100644
--- a/web-console/src/views/load-data-view/info-messages.tsx
+++ b/web-console/src/views/load-data-view/info-messages.tsx
@@ -20,7 +20,7 @@ import { Button, Callout, Code, FormGroup, Intent } from '@blueprintjs/core';
import React from 'react';
import { ExternalLink, LearnMore } from '../../components';
-import type { DimensionMode, IngestionSpec } from '../../druid-models';
+import type { IngestionSpec, SchemaMode } from '../../druid-models';
import { getIngestionDocLink } from '../../druid-models';
import { getLink } from '../../links';
import { deepGet, deepSet } from '../../utils';
@@ -129,11 +129,11 @@ export const FilterMessage = React.memo(function FilterMessage() {
});
export interface SchemaMessageProps {
- dimensionMode: DimensionMode;
+ schemaMode: SchemaMode;
}
export const SchemaMessage = React.memo(function SchemaMessage(props: SchemaMessageProps) {
- const { dimensionMode } = props;
+ const { schemaMode } = props;
return (
<FormGroup>
@@ -142,7 +142,7 @@ export const SchemaMessage = React.memo(function SchemaMessage(props: SchemaMess
Each column in Druid must have an assigned type (string, long, float, double, complex,
etc).
</p>
- {dimensionMode === 'specific' && (
+ {schemaMode === 'fixed' && (
<p>
Default primitive types have been automatically assigned to your columns. If you want to
change the type, click on the column header.
diff --git a/web-console/src/views/load-data-view/load-data-view.tsx b/web-console/src/views/load-data-view/load-data-view.tsx
index 9d698fe1de..b332d277d3 100644
--- a/web-console/src/views/load-data-view/load-data-view.tsx
+++ b/web-console/src/views/load-data-view/load-data-view.tsx
@@ -56,7 +56,6 @@ import {
} from '../../components';
import { AsyncActionDialog } from '../../dialogs';
import type {
- DimensionMode,
DimensionSpec,
DruidFilter,
FlattenField,
@@ -65,6 +64,7 @@ import type {
InputFormat,
IoConfig,
MetricSpec,
+ SchemaMode,
TimestampSpec,
Transform,
TuningConfig,
@@ -84,7 +84,6 @@ import {
FILTER_FIELDS,
FILTERS_FIELDS,
FLATTEN_FIELD_FIELDS,
- getDimensionMode,
getDimensionSpecName,
getIngestionComboType,
getIngestionImage,
@@ -95,6 +94,7 @@ import {
getMetricSpecName,
getRequiredModule,
getRollup,
+ getSchemaMode,
getSecondaryPartitionRelatedFormFields,
getSpecType,
getTimestampExpressionFields,
@@ -367,7 +367,7 @@ export interface LoadDataViewState {
continueToSpec: boolean;
showResetConfirm: boolean;
newRollup?: boolean;
- newDimensionMode?: DimensionMode;
+ newSchemaMode?: SchemaMode;
// welcome
overlordModules?: string[];
@@ -414,6 +414,7 @@ export interface LoadDataViewState {
selectedMetricSpec?: SelectedIndex<MetricSpec>;
// for final step
+ existingDatasources?: string[];
submitting: boolean;
}
@@ -666,8 +667,10 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
case 'partition':
case 'publish':
case 'tuning':
- case 'spec':
return;
+
+ case 'spec':
+ return this.queryForSpec();
}
}
@@ -1994,7 +1997,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
newSpec = updateSchemaWithSample(
newSpec,
transformQueryState.data,
- 'specific',
+ 'fixed',
typeof currentRollup === 'boolean' ? currentRollup : DEFAULT_ROLLUP_SETTING,
);
}
@@ -2197,7 +2200,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
newSpec = updateSchemaWithSample(
newSpec,
filterQueryState.data,
- 'specific',
+ 'fixed',
typeof currentRollup === 'boolean' ? currentRollup : DEFAULT_ROLLUP_SETTING,
);
}
@@ -2308,7 +2311,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
const somethingSelected = Boolean(
selectedAutoDimension || selectedDimensionSpec || selectedMetricSpec,
);
- const dimensionMode = getDimensionMode(spec);
+ const schemaMode = getSchemaMode(spec);
let mainFill: JSX.Element | string;
if (schemaQueryState.isInit()) {
@@ -2349,7 +2352,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
<>
<div className="main">{mainFill}</div>
<div className="control">
- <SchemaMessage dimensionMode={dimensionMode} />
+ <SchemaMessage schemaMode={schemaMode} />
{!somethingSelected && (
<>
<FormGroupWithInfo
@@ -2375,16 +2378,16 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
}
>
<Switch
- checked={dimensionMode === 'specific'}
+ checked={schemaMode === 'fixed'}
onChange={() =>
this.setState({
- newDimensionMode: dimensionMode === 'specific' ? 'auto-detect' : 'specific',
+ newSchemaMode: schemaMode === 'fixed' ? 'string-only-discovery' : 'fixed',
})
}
- label="Explicitly specify dimension list"
+ label="Explicitly specify schema"
/>
</FormGroupWithInfo>
- {dimensionMode === 'auto-detect' && (
+ {schemaMode !== 'fixed' && (
<AutoForm
fields={[
{
@@ -2470,7 +2473,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
<FormGroup>
<Button
text="Add dimension"
- disabled={dimensionMode !== 'specific'}
+ disabled={schemaMode !== 'fixed'}
onClick={() => {
this.setState({
selectedDimensionSpec: {
@@ -2606,7 +2609,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
action={async () => {
const sampleResponse = await sampleForTransform(spec, cacheRows);
this.updateSpec(
- updateSchemaWithSample(spec, sampleResponse, getDimensionMode(spec), newRollup, true),
+ updateSchemaWithSample(spec, sampleResponse, getSchemaMode(spec), newRollup, true),
);
}}
confirmButtonText={`Yes - ${newRollup ? 'enable' : 'disable'} rollup`}
@@ -2622,32 +2625,55 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
}
renderChangeDimensionModeAction() {
- const { newDimensionMode, spec, cacheRows } = this.state;
- if (typeof newDimensionMode === 'undefined' || !cacheRows) return;
- const autoDetect = newDimensionMode === 'auto-detect';
+ const { newSchemaMode, spec, cacheRows } = this.state;
+ if (!newSchemaMode || !cacheRows) return;
+ const autoDetect = newSchemaMode !== 'fixed';
return (
<AsyncActionDialog
action={async () => {
const sampleResponse = await sampleForTransform(spec, cacheRows);
this.updateSpec(
- updateSchemaWithSample(spec, sampleResponse, newDimensionMode, getRollup(spec)),
+ updateSchemaWithSample(spec, sampleResponse, newSchemaMode, getRollup(spec)),
);
}}
- confirmButtonText={`Yes - ${autoDetect ? 'auto detect' : 'explicitly set'} columns`}
- successText={`Dimension mode changes to ${
- autoDetect ? 'auto detect' : 'specific list'
- }. Schema has been updated.`}
- failText="Could change dimension mode"
+ confirmButtonText={`Yes - ${autoDetect ? 'auto detect' : 'explicitly define'} schema`}
+ successText={`Schema mode changed to ${autoDetect ? 'auto detect' : 'explicitly defined'}.`}
+ failText="Could not change schema mode"
intent={Intent.WARNING}
- onClose={() => this.setState({ newDimensionMode: undefined })}
+ onClose={() => this.setState({ newSchemaMode: undefined })}
>
<p>
{autoDetect
- ? `Are you sure you don't want to explicitly specify a dimension list?`
- : `Are you sure you want to explicitly specify a dimension list?`}
+ ? `Are you sure you want Druid to auto detect the data schema?`
+ : `Are you sure you want to explicitly specify a schema?`}
</p>
- <p>Making this change will reset any work you have done in this section.</p>
+ <p>Making this change will reset all schema configuration done so far.</p>
+ {autoDetect && (
+ <Switch
+ checked={newSchemaMode === 'type-aware-discovery'}
+ onChange={() => {
+ this.setState({
+ newSchemaMode:
+ newSchemaMode === 'string-only-discovery'
+ ? 'type-aware-discovery'
+ : 'string-only-discovery',
+ });
+ }}
+ >
+ Use the new type-aware schema discovery capability. Avoid this if you are appending to a
+ datasource created with string-only schema discovery of Druid and want to preserve
+ schema compatibility. For more information see the{' '}
+ <ExternalLink
+ href={`${getLink(
+ 'DOCS',
+ )}/ingestion/schema-design.html#schema-auto-discovery-for-dimensions`}
+ >
+ documentation
+ </ExternalLink>
+ .
+ </Switch>
+ )}
</AsyncActionDialog>
);
}
@@ -2688,7 +2714,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
renderDimensionSpecControls() {
const { spec, selectedDimensionSpec } = this.state;
if (!selectedDimensionSpec) return;
- const dimensionMode = getDimensionMode(spec);
+ const schemaMode = getSchemaMode(spec);
const dimensions = deepGet(spec, `spec.dataSchema.dimensionsSpec.dimensions`) || EMPTY_ARRAY;
@@ -2709,7 +2735,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
const convertToMetric = (type: string, prefix: string) => {
const specWithoutDimension =
- dimensionMode === 'specific'
+ schemaMode === 'fixed'
? deepDelete(
spec,
`spec.dataSchema.dimensionsSpec.dimensions.${selectedDimensionSpec.index}`,
@@ -2811,7 +2837,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
renderMetricSpecControls() {
const { spec, selectedMetricSpec } = this.state;
if (!selectedMetricSpec) return;
- const dimensionMode = getDimensionMode(spec);
+ const schemaMode = getSchemaMode(spec);
const selectedMetricSpecFieldName = selectedMetricSpec.value.fieldName;
const convertToDimension = (type: string) => {
@@ -2862,7 +2888,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
}
>
{selectedMetricSpec.index !== -1 &&
- dimensionMode === 'specific' &&
+ schemaMode === 'fixed' &&
selectedMetricSpecFieldName && (
<FormGroup>
<Popover2 content={convertToDimensionMenu}>
@@ -3242,9 +3268,24 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
}
};
+ async queryForSpec() {
+ let existingDatasources: string[];
+ try {
+ existingDatasources = (await Api.instance.get<string[]>('/druid/coordinator/v1/datasources'))
+ .data;
+ } catch {
+ return;
+ }
+
+ this.setState({
+ existingDatasources,
+ });
+ }
+
renderSpecStep() {
- const { spec, submitting } = this.state;
+ const { spec, existingDatasources, submitting } = this.state;
const issueWithSpec = getIssueWithSpec(spec);
+ const datasource = deepGet(spec, 'spec.dataSchema.dataSource');
return (
<>
@@ -3267,6 +3308,34 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
>{`There is an issue with the spec: ${issueWithSpec}`}</Callout>
</FormGroup>
)}
+ {getSchemaMode(spec) === 'type-aware-discovery' &&
+ existingDatasources?.includes(datasource) && (
+ <FormGroup>
+ <Callout intent={Intent.WARNING}>
+ <p>
+ You have enabled type-aware schema discovery (
+ <Code>useSchemaDiscovery: true</Code>) to ingest data into the existing
+ datasource <Code>{datasource}</Code>.
+ </p>
+ <p>
+ If you used string-based schema discovery when first ingesting data to{' '}
+ <Code>{datasource}</Code>, using type-aware schema discovery now can cause
+ problems with the values multi-value string dimensions.
+ </p>
+ <p>
+ For more information see the{' '}
+ <ExternalLink
+ href={`${getLink(
+ 'DOCS',
+ )}/ingestion/schema-design.html#schema-auto-discovery-for-dimensions`}
+ >
+ documentation
+ </ExternalLink>
+ .
+ </p>
+ </Callout>
+ </FormGroup>
+ )}
<AppendToExistingIssue spec={spec} onChangeSpec={this.updateSpec} />
</div>
<div className="next-bar">
@@ -3279,7 +3348,7 @@ export class LoadDataView extends React.PureComponent<LoadDataViewProps, LoadDat
/>
)}
<Button
- text="Submit"
+ text={submitting ? 'Submitting...' : 'Submit'}
rightIcon={IconNames.CLOUD_UPLOAD}
intent={Intent.PRIMARY}
disabled={submitting || Boolean(issueWithSpec)}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org